runtime: reduce frame size for runtime.cgocallback_gofunc

author Russ Cox <rsc@golang.org>

Tue, 23 Jul 2013 22:40:02 +0000 (18:40 -0400)

committer Russ Cox <rsc@golang.org>

Tue, 23 Jul 2013 22:40:02 +0000 (18:40 -0400)
author Russ Cox <rsc@golang.org>
Tue, 23 Jul 2013 22:40:02 +0000 (18:40 -0400)
committer Russ Cox <rsc@golang.org>
Tue, 23 Jul 2013 22:40:02 +0000 (18:40 -0400)
diff --git a/src/pkg/runtime/asm_386.s b/src/pkg/runtime/asm_386.s

index 5238e594370601987aedbdc951abd03f752e3d0a..6b0739b2ec77a17805ba35b83e64f4f170be857d 100644 (file)
--- a/src/pkg/runtime/asm_386.s
+++ b/src/pkg/runtime/asm_386.s
@@ -524,7 +524,7 @@ TEXT runtime·cgocallback(SB),7,$12-12
  
  // cgocallback_gofunc(FuncVal*, void *frame, uintptr framesize)
  // See cgocall.c for more details.
-TEXT runtime·cgocallback_gofunc(SB),7,$12-12
+TEXT runtime·cgocallback_gofunc(SB),7,$8-12
         // If m is nil, Go did not create the current thread.
         // Call needm to obtain one for temporary use.
         // In this case, we're running on the thread stack, so there's
@@ -532,13 +532,12 @@ TEXT runtime·cgocallback_gofunc(SB),7,$12-12
         // the linker analysis by using an indirect call through AX.
         get_tls(CX)
  #ifdef GOOS_windows
+       MOVL    $0, BP
         CMPL    CX, $0
-       JNE     3(PC)
-       PUSHL   $0
-       JMP needm
+       JNE     2(PC)
  #endif
         MOVL    m(CX), BP
-       PUSHL   BP
+       MOVL    BP, 4(SP)
         CMPL    BP, $0
         JNE     havem
  needm:
@@ -552,55 +551,42 @@ havem:
         // Save current m->g0->sched.sp on stack and then set it to SP.
         // Save current sp in m->g0->sched.sp in preparation for
         // switch back to m->curg stack.
+       // NOTE: unwindm knows that the saved g->sched.sp is at 0(SP).
         MOVL    m_g0(BP), SI
-       PUSHL   (g_sched+gobuf_sp)(SI)
+       MOVL    (g_sched+gobuf_sp)(SI), AX
+       MOVL    AX, 0(SP)
         MOVL    SP, (g_sched+gobuf_sp)(SI)
  
-       // Switch to m->curg stack and call runtime.cgocallbackg
-       // with the three arguments.  Because we are taking over
-       // the execution of m->curg but *not* resuming what had
-       // been running, we need to save that information (m->curg->sched)
-       // so that we can restore it when we're done. 
+       // Switch to m->curg stack and call runtime.cgocallbackg.
+       // Because we are taking over the execution of m->curg
+       // but *not* resuming what had been running, we need to
+       // save that information (m->curg->sched) so we can restore it.
         // We can restore m->curg->sched.sp easily, because calling
         // runtime.cgocallbackg leaves SP unchanged upon return.
         // To save m->curg->sched.pc, we push it onto the stack.
         // This has the added benefit that it looks to the traceback
         // routine like cgocallbackg is going to return to that
-       // PC (because we defined cgocallbackg to have
-       // a frame size of 12, the same amount that we use below),
+       // PC (because the frame we allocate below has the same
+       // size as cgocallback_gofunc's frame declared above)
         // so that the traceback will seamlessly trace back into
         // the earlier calls.
-       MOVL    fn+0(FP), AX
-       MOVL    frame+4(FP), BX
-       MOVL    framesize+8(FP), DX
-
+       //
+       // In the new goroutine, 0(SP) and 4(SP) are unused except
+       // on Windows, where they are the SEH block.
         MOVL    m_curg(BP), SI
         MOVL    SI, g(CX)
-       MOVL    (g_sched+gobuf_sp)(SI), DI  // prepare stack as DI
-
-       // Push gobuf.pc
+       MOVL    (g_sched+gobuf_sp)(SI), DI // prepare stack as DI
         MOVL    (g_sched+gobuf_pc)(SI), BP
-       SUBL    $4, DI
-       MOVL    BP, 0(DI)
-
-       // Push arguments to cgocallbackg.
-       // Frame size here must match the frame size above plus the pushes
-       // to trick traceback routines into doing the right thing.
-       SUBL    $20, DI
-       MOVL    AX, 0(DI)
-       MOVL    BX, 4(DI)
-       MOVL    DX, 8(DI)
-       
-       // Switch stack and make the call.
-       MOVL    DI, SP
+       MOVL    BP, -4(DI)
+       LEAL    -(4+8)(DI), SP
         CALL    runtime·cgocallbackg(SB)
  
         // Restore g->sched (== m->curg->sched) from saved values.
         get_tls(CX)
         MOVL    g(CX), SI
-       MOVL    20(SP), BP
+       MOVL    8(SP), BP
         MOVL    BP, (g_sched+gobuf_pc)(SI)
-       LEAL    (20+4)(SP), DI
+       LEAL    (8+4)(SP), DI
         MOVL    DI, (g_sched+gobuf_sp)(SI)
  
         // Switch back to m->g0's stack and restore m->g0->sched.sp.
@@ -610,11 +596,12 @@ havem:
         MOVL    m_g0(BP), SI
         MOVL    SI, g(CX)
         MOVL    (g_sched+gobuf_sp)(SI), SP
-       POPL    (g_sched+gobuf_sp)(SI)
+       MOVL    0(SP), AX
+       MOVL    AX, (g_sched+gobuf_sp)(SI)
         
         // If the m on entry was nil, we called needm above to borrow an m
         // for the duration of the call. Since the call is over, return it with dropm.
-       POPL    BP
+       MOVL    8(SP), BP
         CMPL    BP, $0
         JNE 3(PC)
         MOVL    $runtime·dropm(SB), AX
diff --git a/src/pkg/runtime/asm_amd64.s b/src/pkg/runtime/asm_amd64.s

index f8f77124d9260e15226a89aadb0b1253b88d11b7..1ec635516dbdd0e9fdd011271d9520f2fb977fa1 100644 (file)
--- a/src/pkg/runtime/asm_amd64.s
+++ b/src/pkg/runtime/asm_amd64.s
@@ -563,7 +563,7 @@ TEXT runtime·cgocallback(SB),7,$24-24
  
  // cgocallback_gofunc(FuncVal*, void *frame, uintptr framesize)
  // See cgocall.c for more details.
-TEXT runtime·cgocallback_gofunc(SB),7,$24-24
+TEXT runtime·cgocallback_gofunc(SB),7,$16-24
         // If m is nil, Go did not create the current thread.
         // Call needm to obtain one for temporary use.
         // In this case, we're running on the thread stack, so there's
@@ -571,13 +571,12 @@ TEXT runtime·cgocallback_gofunc(SB),7,$24-24
         // the linker analysis by using an indirect call through AX.
         get_tls(CX)
  #ifdef GOOS_windows
+       MOVL    $0, BP
         CMPQ    CX, $0
-       JNE     3(PC)
-       PUSHQ   $0
-       JMP     needm
+       JNE     2(PC)
  #endif
         MOVQ    m(CX), BP
-       PUSHQ   BP
+       MOVQ    BP, 8(SP)
         CMPQ    BP, $0
         JNE     havem
  needm:
@@ -591,55 +590,42 @@ havem:
         // Save current m->g0->sched.sp on stack and then set it to SP.
         // Save current sp in m->g0->sched.sp in preparation for
         // switch back to m->curg stack.
+       // NOTE: unwindm knows that the saved g->sched.sp is at 0(SP).
         MOVQ    m_g0(BP), SI
-       PUSHQ   (g_sched+gobuf_sp)(SI)
+       MOVQ    (g_sched+gobuf_sp)(SI), AX
+       MOVQ    AX, 0(SP)
         MOVQ    SP, (g_sched+gobuf_sp)(SI)
  
-       // Switch to m->curg stack and call runtime.cgocallbackg
-       // with the three arguments.  Because we are taking over
-       // the execution of m->curg but *not* resuming what had
-       // been running, we need to save that information (m->curg->sched)
-       // so that we can restore it when we're done. 
+       // Switch to m->curg stack and call runtime.cgocallbackg.
+       // Because we are taking over the execution of m->curg
+       // but *not* resuming what had been running, we need to
+       // save that information (m->curg->sched) so we can restore it.
         // We can restore m->curg->sched.sp easily, because calling
         // runtime.cgocallbackg leaves SP unchanged upon return.
         // To save m->curg->sched.pc, we push it onto the stack.
         // This has the added benefit that it looks to the traceback
         // routine like cgocallbackg is going to return to that
-       // PC (because we defined cgocallbackg to have
-       // a frame size of 24, the same amount that we use below),
+       // PC (because the frame we allocate below has the same
+       // size as cgocallback_gofunc's frame declared above)
         // so that the traceback will seamlessly trace back into
         // the earlier calls.
-       MOVQ    fn+0(FP), AX
-       MOVQ    frame+8(FP), BX
-       MOVQ    framesize+16(FP), DX
-
+       //
+       // In the new goroutine, 0(SP) and 8(SP) are unused except
+       // on Windows, where they are the SEH block.
         MOVQ    m_curg(BP), SI
         MOVQ    SI, g(CX)
         MOVQ    (g_sched+gobuf_sp)(SI), DI  // prepare stack as DI
-
-       // Push gobuf.pc
         MOVQ    (g_sched+gobuf_pc)(SI), BP
-       SUBQ    $8, DI
-       MOVQ    BP, 0(DI)
-
-       // Push arguments to cgocallbackg.
-       // Frame size here must match the frame size above plus the pushes
-       // to trick traceback routines into doing the right thing.
-       SUBQ    $40, DI
-       MOVQ    AX, 0(DI)
-       MOVQ    BX, 8(DI)
-       MOVQ    DX, 16(DI)
-       
-       // Switch stack and make the call.
-       MOVQ    DI, SP
+       MOVQ    BP, -8(DI)
+       LEAQ    -(8+16)(DI), SP
         CALL    runtime·cgocallbackg(SB)
  
         // Restore g->sched (== m->curg->sched) from saved values.
         get_tls(CX)
         MOVQ    g(CX), SI
-       MOVQ    40(SP), BP
+       MOVQ    16(SP), BP
         MOVQ    BP, (g_sched+gobuf_pc)(SI)
-       LEAQ    (40+8)(SP), DI
+       LEAQ    (16+8)(SP), DI
         MOVQ    DI, (g_sched+gobuf_sp)(SI)
  
         // Switch back to m->g0's stack and restore m->g0->sched.sp.
@@ -649,11 +635,12 @@ havem:
         MOVQ    m_g0(BP), SI
         MOVQ    SI, g(CX)
         MOVQ    (g_sched+gobuf_sp)(SI), SP
-       POPQ    (g_sched+gobuf_sp)(SI)
+       MOVQ    0(SP), AX
+       MOVQ    AX, (g_sched+gobuf_sp)(SI)
         
         // If the m on entry was nil, we called needm above to borrow an m
         // for the duration of the call. Since the call is over, return it with dropm.
-       POPQ    BP
+       MOVQ    8(SP), BP
         CMPQ    BP, $0
         JNE 3(PC)
         MOVQ    $runtime·dropm(SB), AX
diff --git a/src/pkg/runtime/asm_arm.s b/src/pkg/runtime/asm_arm.s

index be6d29b567cb5cffa4b20eca83788463032b7d81..863d9a50647ad4c4c211bf0c252641f9f32e8e16 100644 (file)
--- a/src/pkg/runtime/asm_arm.s
+++ b/src/pkg/runtime/asm_arm.s
@@ -331,7 +331,7 @@ TEXT runtime·cgocallback(SB),7,$12-12
  
  // cgocallback_gofunc(void (*fn)(void*), void *frame, uintptr framesize)
  // See cgocall.c for more details.
-TEXT   runtime·cgocallback_gofunc(SB),7,$12-12
+TEXT   runtime·cgocallback_gofunc(SB),7,$8-12
         // Load m and g from thread-local storage.
         MOVW    _cgo_load_gm(SB), R0
         CMP     $0, R0
@@ -342,7 +342,7 @@ TEXT        runtime·cgocallback_gofunc(SB),7,$12-12
         // In this case, we're running on the thread stack, so there's
         // lots of space, but the linker doesn't know. Hide the call from
         // the linker analysis by using an indirect call.
-       MOVW    m, savedm-12(SP)
+       MOVW    m, savedm-4(SP)
         CMP     $0, m
         B.NE havem
         MOVW    $runtime·needm(SB), R0
@@ -353,51 +353,41 @@ havem:
         // Save current m->g0->sched.sp on stack and then set it to SP.
         // Save current sp in m->g0->sched.sp in preparation for
         // switch back to m->curg stack.
+       // NOTE: unwindm knows that the saved g->sched.sp is at 4(R13) aka savedsp-8(SP).
         MOVW    m_g0(m), R3
         MOVW    (g_sched+gobuf_sp)(R3), R4
-       MOVW.W  R4, -4(R13)
+       MOVW    R4, savedsp-8(SP)
         MOVW    R13, (g_sched+gobuf_sp)(R3)
  
-       // Switch to m->curg stack and call runtime.cgocallbackg
-       // with the three arguments.  Because we are taking over
-       // the execution of m->curg but *not* resuming what had
-       // been running, we need to save that information (m->curg->sched)
-       // so that we can restore it when we're done. 
+       // Switch to m->curg stack and call runtime.cgocallbackg.
+       // Because we are taking over the execution of m->curg
+       // but *not* resuming what had been running, we need to
+       // save that information (m->curg->sched) so we can restore it.
         // We can restore m->curg->sched.sp easily, because calling
         // runtime.cgocallbackg leaves SP unchanged upon return.
         // To save m->curg->sched.pc, we push it onto the stack.
         // This has the added benefit that it looks to the traceback
         // routine like cgocallbackg is going to return to that
-       // PC (because we defined cgocallbackg to have
-       // a frame size of 12, the same amount that we use below),
+       // PC (because the frame we allocate below has the same
+       // size as cgocallback_gofunc's frame declared above)
         // so that the traceback will seamlessly trace back into
         // the earlier calls.
+       //
+       // In the new goroutine, -8(SP) and -4(SP) are unused.
         MOVW    fn+4(FP), R0
         MOVW    frame+8(FP), R1
         MOVW    framesize+12(FP), R2
-
         MOVW    m_curg(m), g
         MOVW    (g_sched+gobuf_sp)(g), R4 // prepare stack as R4
-
-       // Push gobuf.pc
-       // Frame size here must match the frame size above plus the push
-       // to trick traceback routines into doing the right thing.
         MOVW    (g_sched+gobuf_pc)(g), R5
-       MOVW.W  R5, -20(R4)
-
-       // Push arguments to cgocallbackg.
-       MOVW    R0, 4(R4)
-       MOVW    R1, 8(R4)
-       MOVW    R2, 12(R4)
-       
-       // Switch stack and make the call.
-       MOVW    R4, R13
+       MOVW    R5, -12(R4)
+       MOVW    $-12(R4), R13
         BL      runtime·cgocallbackg(SB)
  
         // Restore g->sched (== m->curg->sched) from saved values.
         MOVW    0(R13), R5
         MOVW    R5, (g_sched+gobuf_pc)(g)
-       ADD     $(16+4), R13, R4
+       MOVW    $12(R13), R4
         MOVW    R4, (g_sched+gobuf_sp)(g)
  
         // Switch back to m->g0's stack and restore m->g0->sched.sp.
@@ -405,14 +395,12 @@ havem:
         // so we do not have to restore it.)
         MOVW    m_g0(m), g
         MOVW    (g_sched+gobuf_sp)(g), R13
-       // POP R6
-       MOVW    0(R13), R6
-       ADD     $4, R13
-       MOVW    R6, (g_sched+gobuf_sp)(g)
+       MOVW    savedsp-8(SP), R4
+       MOVW    R4, (g_sched+gobuf_sp)(g)
  
         // If the m on entry was nil, we called needm above to borrow an m
         // for the duration of the call. Since the call is over, return it with dropm.
-       MOVW    savedm-12(SP), R6
+       MOVW    savedm-4(SP), R6
         CMP     $0, R6
         B.NE    3(PC)
         MOVW    $runtime·dropm(SB), R0
diff --git a/src/pkg/runtime/cgocall.c b/src/pkg/runtime/cgocall.c

index 16bc76554911a7932fd22b848b7b6741702e6f92..a624509cfef2cf20d01e943430d48eea9ff81fbf 100644 (file)
--- a/src/pkg/runtime/cgocall.c
+++ b/src/pkg/runtime/cgocall.c
@@ -228,13 +228,25 @@ runtime·cfree(void *p)
  
  static FuncVal unwindmf = {unwindm};
  
+typedef struct CallbackArgs CallbackArgs;
+struct CallbackArgs
+{
+       FuncVal *fn;
+       void *arg;
+       uintptr argsize;
+};
+
+#define CBARGS (CallbackArgs*)((byte*)m->g0->sched.sp+(3+(thechar=='5'))*sizeof(void*))
+
  void
-runtime·cgocallbackg(FuncVal *fn, void *arg, uintptr argsize)
+runtime·cgocallbackg(void)
  {
         Defer d;
+       CallbackArgs *cb;
  
         if(m->racecall) {
-               reflect·call(fn, arg, argsize);
+               cb = CBARGS;
+               reflect·call(cb->fn, cb->arg, cb->argsize);
                 return;
         }
  
@@ -261,7 +273,8 @@ runtime·cgocallbackg(FuncVal *fn, void *arg, uintptr argsize)
                 runtime·raceacquire(&cgosync);
  
         // Invoke callback.
-       reflect·call(fn, arg, argsize);
+       cb = CBARGS;
+       reflect·call(cb->fn, cb->arg, cb->argsize);
  
         if(raceenabled)
                 runtime·racereleasemerge(&cgosync);
@@ -286,9 +299,11 @@ unwindm(void)
                 runtime·throw("runtime: unwindm not implemented");
         case '8':
         case '6':
-       case '5':
                 m->g0->sched.sp = *(uintptr*)m->g0->sched.sp;
                 break;
+       case '5':
+               m->g0->sched.sp = *(uintptr*)((byte*)m->g0->sched.sp + 4);
+               break;
         }
  }
  
diff --git a/src/pkg/runtime/proc.c b/src/pkg/runtime/proc.c

index 9530b9984af9660e066a4c9aa991c1741577bd05..3ae9fe7273cb3a654d1e199385b526accae3f617 100644 (file)
--- a/src/pkg/runtime/proc.c
+++ b/src/pkg/runtime/proc.c
@@ -651,10 +651,10 @@ runtime·needm(byte x)
         g->stackguard0 = g->stackguard;
  
         // On windows/386, we need to put an SEH frame (two words)
-       // somewhere on the current stack. We are called
-       // from needm, and we know there is some available
-       // space one word into the argument frame. Use that.
-       m->seh = (SEH*)((uintptr*)&x + 1);
+       // somewhere on the current stack. We are called from cgocallback_gofunc
+       // and we know that it will leave two unused words below m->curg->sched.sp.
+       // Use those.
+       m->seh = (SEH*)((uintptr*)m->curg->sched.sp - 3);
  
         // Initialize this thread to use the m.
         runtime·asminit();
author	Russ Cox <rsc@golang.org>
	Tue, 23 Jul 2013 22:40:02 +0000 (18:40 -0400)
committer	Russ Cox <rsc@golang.org>
	Tue, 23 Jul 2013 22:40:02 +0000 (18:40 -0400)
src/pkg/runtime/asm_386.s		patch \| blob \| history
src/pkg/runtime/asm_amd64.s		patch \| blob \| history
src/pkg/runtime/asm_arm.s		patch \| blob \| history
src/pkg/runtime/cgocall.c		patch \| blob \| history
src/pkg/runtime/proc.c		patch \| blob \| history