]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/5l, cmd/6l, cmd/8l: refactor stack split code
authorRuss Cox <rsc@golang.org>
Thu, 12 Sep 2013 00:29:45 +0000 (20:29 -0400)
committerRuss Cox <rsc@golang.org>
Thu, 12 Sep 2013 00:29:45 +0000 (20:29 -0400)
Pull the stack split generation into its own function.
This will make an upcoming change to fix recover
easier to digest.

R=ken2
CC=golang-dev
https://golang.org/cl/13611044

src/cmd/5l/noop.c
src/cmd/6l/pass.c
src/cmd/8l/pass.c

index 44f4c22cf27cb055ddf8f4b23e299bd44925e9cd..7b63aa715675b7dcdd8b53646cdc7f48a9752f81 100644 (file)
@@ -38,6 +38,10 @@ static       Sym*    sym_div;
 static Sym*    sym_divu;
 static Sym*    sym_mod;
 static Sym*    sym_modu;
+static Sym*    symmorestack;
+static Prog*   pmorestack;
+
+static Prog*   stacksplit(Prog*, int32);
 
 static void
 linkcase(Prog *casep)
@@ -58,9 +62,7 @@ noops(void)
 {
        Prog *p, *q, *q1, *q2;
        int o;
-       int32 arg;
-       Prog *pmorestack;
-       Sym *symmorestack, *tlsfallback, *gmsym;
+       Sym *tlsfallback, *gmsym;
 
        /*
         * find leaf subroutines
@@ -256,136 +258,8 @@ noops(void)
                                                break;
                                }
        
-                               if(!(p->reg & NOSPLIT)) {
-                                       // MOVW                 g_stackguard(g), R1
-                                       p = appendp(p);
-                                       p->as = AMOVW;
-                                       p->from.type = D_OREG;
-                                       p->from.reg = REGG;
-                                       p->to.type = D_REG;
-                                       p->to.reg = 1;
-                                       
-                                       if(autosize <= StackSmall) {
-                                               // small stack: SP < stackguard
-                                               //      CMP     stackguard, SP
-                                               p = appendp(p);
-                                               p->as = ACMP;
-                                               p->from.type = D_REG;
-                                               p->from.reg = 1;
-                                               p->reg = REGSP;
-                                       } else if(autosize <= StackBig) {
-                                               // large stack: SP-framesize < stackguard-StackSmall
-                                               //      MOVW $-autosize(SP), R2
-                                               //      CMP stackguard, R2
-                                               p = appendp(p);
-                                               p->as = AMOVW;
-                                               p->from.type = D_CONST;
-                                               p->from.reg = REGSP;
-                                               p->from.offset = -autosize;
-                                               p->to.type = D_REG;
-                                               p->to.reg = 2;
-                                               
-                                               p = appendp(p);
-                                               p->as = ACMP;
-                                               p->from.type = D_REG;
-                                               p->from.reg = 1;
-                                               p->reg = 2;
-                                       } else {
-                                               // Such a large stack we need to protect against wraparound
-                                               // if SP is close to zero.
-                                               //      SP-stackguard+StackGuard < framesize + (StackGuard-StackSmall)
-                                               // The +StackGuard on both sides is required to keep the left side positive:
-                                               // SP is allowed to be slightly below stackguard. See stack.h.
-                                               //      CMP $StackPreempt, R1
-                                               //      MOVW.NE $StackGuard(SP), R2
-                                               //      SUB.NE R1, R2
-                                               //      MOVW.NE $(autosize+(StackGuard-StackSmall)), R3
-                                               //      CMP.NE R3, R2
-                                               p = appendp(p);
-                                               p->as = ACMP;
-                                               p->from.type = D_CONST;
-                                               p->from.offset = (uint32)StackPreempt;
-                                               p->reg = 1;
-
-                                               p = appendp(p);
-                                               p->as = AMOVW;
-                                               p->from.type = D_CONST;
-                                               p->from.reg = REGSP;
-                                               p->from.offset = StackGuard;
-                                               p->to.type = D_REG;
-                                               p->to.reg = 2;
-                                               p->scond = C_SCOND_NE;
-                                               
-                                               p = appendp(p);
-                                               p->as = ASUB;
-                                               p->from.type = D_REG;
-                                               p->from.reg = 1;
-                                               p->to.type = D_REG;
-                                               p->to.reg = 2;
-                                               p->scond = C_SCOND_NE;
-                                               
-                                               p = appendp(p);
-                                               p->as = AMOVW;
-                                               p->from.type = D_CONST;
-                                               p->from.offset = autosize + (StackGuard - StackSmall);
-                                               p->to.type = D_REG;
-                                               p->to.reg = 3;
-                                               p->scond = C_SCOND_NE;
-                                               
-                                               p = appendp(p);
-                                               p->as = ACMP;
-                                               p->from.type = D_REG;
-                                               p->from.reg = 3;
-                                               p->reg = 2;
-                                               p->scond = C_SCOND_NE;
-                                       }
-                                       
-                                       // MOVW.LS              $autosize, R1
-                                       p = appendp(p);
-                                       p->as = AMOVW;
-                                       p->scond = C_SCOND_LS;
-                                       p->from.type = D_CONST;
-                                       p->from.offset = autosize;
-                                       p->to.type = D_REG;
-                                       p->to.reg = 1;
-       
-                                       // MOVW.LS              $args, R2
-                                       p = appendp(p);
-                                       p->as = AMOVW;
-                                       p->scond = C_SCOND_LS;
-                                       p->from.type = D_CONST;
-                                       arg = cursym->text->to.offset2;
-                                       if(arg == 1) // special marker for known 0
-                                               arg = 0;
-                                       if(arg&3)
-                                               diag("misaligned argument size in stack split");
-                                       p->from.offset = arg;
-                                       p->to.type = D_REG;
-                                       p->to.reg = 2;
-       
-                                       // MOVW.LS      R14, R3
-                                       p = appendp(p);
-                                       p->as = AMOVW;
-                                       p->scond = C_SCOND_LS;
-                                       p->from.type = D_REG;
-                                       p->from.reg = REGLINK;
-                                       p->to.type = D_REG;
-                                       p->to.reg = 3;
-       
-                                       // BL.LS                runtime.morestack(SB) // modifies LR, returns with LO still asserted
-                                       p = appendp(p);
-                                       p->as = ABL;
-                                       p->scond = C_SCOND_LS;
-                                       p->to.type = D_BRANCH;
-                                       p->to.sym = symmorestack;
-                                       p->cond = pmorestack;
-                                       
-                                       // BLS  start
-                                       p = appendp(p);
-                                       p->as = ABLS;
-                                       p->to.type = D_BRANCH;
-                                       p->cond = cursym->text->link;
-                               }
+                               if(!(p->reg & NOSPLIT))
+                                       p = stacksplit(p, autosize); // emit split check
                                
                                // MOVW.W               R14,$-autosize(SP)
                                p = appendp(p);
@@ -554,6 +428,143 @@ noops(void)
        }
 }
 
+static Prog*
+stacksplit(Prog *p, int32 framesize)
+{
+       int32 arg;
+
+       // MOVW                 g_stackguard(g), R1
+       p = appendp(p);
+       p->as = AMOVW;
+       p->from.type = D_OREG;
+       p->from.reg = REGG;
+       p->to.type = D_REG;
+       p->to.reg = 1;
+       
+       if(framesize <= StackSmall) {
+               // small stack: SP < stackguard
+               //      CMP     stackguard, SP
+               p = appendp(p);
+               p->as = ACMP;
+               p->from.type = D_REG;
+               p->from.reg = 1;
+               p->reg = REGSP;
+       } else if(framesize <= StackBig) {
+               // large stack: SP-framesize < stackguard-StackSmall
+               //      MOVW $-framesize(SP), R2
+               //      CMP stackguard, R2
+               p = appendp(p);
+               p->as = AMOVW;
+               p->from.type = D_CONST;
+               p->from.reg = REGSP;
+               p->from.offset = -framesize;
+               p->to.type = D_REG;
+               p->to.reg = 2;
+               
+               p = appendp(p);
+               p->as = ACMP;
+               p->from.type = D_REG;
+               p->from.reg = 1;
+               p->reg = 2;
+       } else {
+               // Such a large stack we need to protect against wraparound
+               // if SP is close to zero.
+               //      SP-stackguard+StackGuard < framesize + (StackGuard-StackSmall)
+               // The +StackGuard on both sides is required to keep the left side positive:
+               // SP is allowed to be slightly below stackguard. See stack.h.
+               //      CMP $StackPreempt, R1
+               //      MOVW.NE $StackGuard(SP), R2
+               //      SUB.NE R1, R2
+               //      MOVW.NE $(framesize+(StackGuard-StackSmall)), R3
+               //      CMP.NE R3, R2
+               p = appendp(p);
+               p->as = ACMP;
+               p->from.type = D_CONST;
+               p->from.offset = (uint32)StackPreempt;
+               p->reg = 1;
+
+               p = appendp(p);
+               p->as = AMOVW;
+               p->from.type = D_CONST;
+               p->from.reg = REGSP;
+               p->from.offset = StackGuard;
+               p->to.type = D_REG;
+               p->to.reg = 2;
+               p->scond = C_SCOND_NE;
+               
+               p = appendp(p);
+               p->as = ASUB;
+               p->from.type = D_REG;
+               p->from.reg = 1;
+               p->to.type = D_REG;
+               p->to.reg = 2;
+               p->scond = C_SCOND_NE;
+               
+               p = appendp(p);
+               p->as = AMOVW;
+               p->from.type = D_CONST;
+               p->from.offset = framesize + (StackGuard - StackSmall);
+               p->to.type = D_REG;
+               p->to.reg = 3;
+               p->scond = C_SCOND_NE;
+               
+               p = appendp(p);
+               p->as = ACMP;
+               p->from.type = D_REG;
+               p->from.reg = 3;
+               p->reg = 2;
+               p->scond = C_SCOND_NE;
+       }
+       
+       // MOVW.LS              $framesize, R1
+       p = appendp(p);
+       p->as = AMOVW;
+       p->scond = C_SCOND_LS;
+       p->from.type = D_CONST;
+       p->from.offset = framesize;
+       p->to.type = D_REG;
+       p->to.reg = 1;
+
+       // MOVW.LS              $args, R2
+       p = appendp(p);
+       p->as = AMOVW;
+       p->scond = C_SCOND_LS;
+       p->from.type = D_CONST;
+       arg = cursym->text->to.offset2;
+       if(arg == 1) // special marker for known 0
+               arg = 0;
+       if(arg&3)
+               diag("misaligned argument size in stack split");
+       p->from.offset = arg;
+       p->to.type = D_REG;
+       p->to.reg = 2;
+
+       // MOVW.LS      R14, R3
+       p = appendp(p);
+       p->as = AMOVW;
+       p->scond = C_SCOND_LS;
+       p->from.type = D_REG;
+       p->from.reg = REGLINK;
+       p->to.type = D_REG;
+       p->to.reg = 3;
+
+       // BL.LS                runtime.morestack(SB) // modifies LR, returns with LO still asserted
+       p = appendp(p);
+       p->as = ABL;
+       p->scond = C_SCOND_LS;
+       p->to.type = D_BRANCH;
+       p->to.sym = symmorestack;
+       p->cond = pmorestack;
+       
+       // BLS  start
+       p = appendp(p);
+       p->as = ABLS;
+       p->to.type = D_BRANCH;
+       p->cond = cursym->text->link;
+       
+       return p;
+}
+
 static void
 sigdiv(char *n)
 {
index be1bc4f07002cb429b453142dcb28aad7c0d969b..d24672432fafecd0d95bcdd6fb69c870af65dba6 100644 (file)
@@ -271,8 +271,9 @@ patch(void)
 {
        int32 c;
        Prog *p, *q;
-       Sym *s, *gmsym;
+       Sym *s;
        int32 vexit;
+       Sym *gmsym;
 
        if(debug['v'])
                Bprint(&bso, "%5.2f mkfwd\n", cputime());
@@ -467,6 +468,10 @@ morename[] =
 };
 Prog*  pmorestack[nelem(morename)];
 Sym*   symmorestack[nelem(morename)];
+Sym*   gmsym;
+
+static Prog*   load_g_cx(Prog*);
+static Prog*   stacksplit(Prog*, int32, Prog**);
 
 void
 dostkoff(void)
@@ -474,9 +479,7 @@ dostkoff(void)
        Prog *p, *q, *q1;
        int32 autoffset, deltasp;
        int a, pcsize;
-       uint32 moreconst1, moreconst2, i;
-       Sym *gmsym;
-
+       uint32 i;
 
        gmsym = lookup("runtime.tlsgm", 0);
        for(i=0; i<nelem(morename); i++) {
@@ -504,240 +507,16 @@ dostkoff(void)
                noleaf:;
                }
 
-               q = P;
-               q1 = P;
                if((p->from.scale & NOSPLIT) && autoffset >= StackSmall)
                        diag("nosplit func likely to overflow stack");
 
+               q = P;
                if(!(p->from.scale & NOSPLIT)) {
-                       if(flag_shared) {
-                               // Load TLS offset with MOVQ $runtime.tlsgm(SB), CX
-                               p = appendp(p);
-                               p->as = AMOVQ;
-                               p->from.type = D_EXTERN;
-                               p->from.sym = gmsym;
-                               p->to.type = D_CX;
-                       }
-                       p = appendp(p); // load g into CX
-                       p->as = AMOVQ;
-                       if(HEADTYPE == Hlinux || HEADTYPE == Hfreebsd
-                       || HEADTYPE == Hopenbsd || HEADTYPE == Hnetbsd
-                       || HEADTYPE == Hplan9x64 || HEADTYPE == Hdragonfly)
-                               // ELF uses FS
-                               p->from.type = D_INDIR+D_FS;
-                       else
-                               p->from.type = D_INDIR+D_GS;
-                       if(flag_shared) {
-                               // Add TLS offset stored in CX
-                               p->from.index = p->from.type - D_INDIR;
-                               p->from.type = D_INDIR + D_CX;
-                       }
-                       p->from.offset = tlsoffset+0;
-                       p->to.type = D_CX;
-                       if(HEADTYPE == Hwindows) {
-                               // movq %gs:0x28, %rcx
-                               // movq (%rcx), %rcx
-                               p->as = AMOVQ;
-                               p->from.type = D_INDIR+D_GS;
-                               p->from.offset = 0x28;
-                               p->to.type = D_CX;
-
-                       
-                               p = appendp(p);
-                               p->as = AMOVQ;
-                               p->from.type = D_INDIR+D_CX;
-                               p->from.offset = 0;
-                               p->to.type = D_CX;
-                       }
-
-                       if(debug['K']) {
-                               // 6l -K means check not only for stack
-                               // overflow but stack underflow.
-                               // On underflow, INT 3 (breakpoint).
-                               // Underflow itself is rare but this also
-                               // catches out-of-sync stack guard info
-
-                               p = appendp(p);
-                               p->as = ACMPQ;
-                               p->from.type = D_INDIR+D_CX;
-                               p->from.offset = 8;
-                               p->to.type = D_SP;
-
-                               p = appendp(p);
-                               p->as = AJHI;
-                               p->to.type = D_BRANCH;
-                               p->to.offset = 4;
-                               q1 = p;
-
-                               p = appendp(p);
-                               p->as = AINT;
-                               p->from.type = D_CONST;
-                               p->from.offset = 3;
-
-                               p = appendp(p);
-                               p->as = ANOP;
-                               q1->pcond = p;
-                       }
-
-                       q1 = P;
-                       if(autoffset <= StackSmall) {
-                               // small stack: SP <= stackguard
-                               //      CMPQ SP, stackguard
-                               p = appendp(p);
-                               p->as = ACMPQ;
-                               p->from.type = D_SP;
-                               p->to.type = D_INDIR+D_CX;
-                       } else if(autoffset <= StackBig) {
-                               // large stack: SP-framesize <= stackguard-StackSmall
-                               //      LEAQ -xxx(SP), AX
-                               //      CMPQ AX, stackguard
-                               p = appendp(p);
-                               p->as = ALEAQ;
-                               p->from.type = D_INDIR+D_SP;
-                               p->from.offset = -(autoffset-StackSmall);
-                               p->to.type = D_AX;
-
-                               p = appendp(p);
-                               p->as = ACMPQ;
-                               p->from.type = D_AX;
-                               p->to.type = D_INDIR+D_CX;
-                       } else {
-                               // Such a large stack we need to protect against wraparound.
-                               // If SP is close to zero:
-                               //      SP-stackguard+StackGuard <= framesize + (StackGuard-StackSmall)
-                               // The +StackGuard on both sides is required to keep the left side positive:
-                               // SP is allowed to be slightly below stackguard. See stack.h.
-                               //
-                               // Preemption sets stackguard to StackPreempt, a very large value.
-                               // That breaks the math above, so we have to check for that explicitly.
-                               //      MOVQ    stackguard, CX
-                               //      CMPQ    CX, $StackPreempt
-                               //      JEQ     label-of-call-to-morestack
-                               //      LEAQ    StackGuard(SP), AX
-                               //      SUBQ    CX, AX
-                               //      CMPQ    AX, $(autoffset+(StackGuard-StackSmall))
-
-                               p = appendp(p);
-                               p->as = AMOVQ;
-                               p->from.type = D_INDIR+D_CX;
-                               p->from.offset = 0;
-                               p->to.type = D_SI;
-
-                               p = appendp(p);
-                               p->as = ACMPQ;
-                               p->from.type = D_SI;
-                               p->to.type = D_CONST;
-                               p->to.offset = StackPreempt;
-
-                               p = appendp(p);
-                               p->as = AJEQ;
-                               p->to.type = D_BRANCH;
-                               q1 = p;
-
-                               p = appendp(p);
-                               p->as = ALEAQ;
-                               p->from.type = D_INDIR+D_SP;
-                               p->from.offset = StackGuard;
-                               p->to.type = D_AX;
-                               
-                               p = appendp(p);
-                               p->as = ASUBQ;
-                               p->from.type = D_SI;
-                               p->to.type = D_AX;
-                               
-                               p = appendp(p);
-                               p->as = ACMPQ;
-                               p->from.type = D_AX;
-                               p->to.type = D_CONST;
-                               p->to.offset = autoffset+(StackGuard-StackSmall);
-                       }                                       
-
-                       // common
-                       p = appendp(p);
-                       p->as = AJHI;
-                       p->to.type = D_BRANCH;
-                       q = p;
-
-                       // If we ask for more stack, we'll get a minimum of StackMin bytes.
-                       // We need a stack frame large enough to hold the top-of-stack data,
-                       // the function arguments+results, our caller's PC, our frame,
-                       // a word for the return PC of the next call, and then the StackLimit bytes
-                       // that must be available on entry to any function called from a function
-                       // that did a stack check.  If StackMin is enough, don't ask for a specific
-                       // amount: then we can use the custom functions and save a few
-                       // instructions.
-                       moreconst1 = 0;
-                       if(StackTop + textarg + PtrSize + autoffset + PtrSize + StackLimit >= StackMin)
-                               moreconst1 = autoffset;
-                       moreconst2 = textarg;
-                       if(moreconst2 == 1) // special marker
-                               moreconst2 = 0;
-                       if((moreconst2&7) != 0)
-                               diag("misaligned argument size in stack split");
-                       // 4 varieties varieties (const1==0 cross const2==0)
-                       // and 6 subvarieties of (const1==0 and const2!=0)
                        p = appendp(p);
-                       if(moreconst1 == 0 && moreconst2 == 0) {
-                               p->as = ACALL;
-                               p->to.type = D_BRANCH;
-                               p->pcond = pmorestack[0];
-                               p->to.sym = symmorestack[0];
-                       } else
-                       if(moreconst1 != 0 && moreconst2 == 0) {
-                               p->as = AMOVL;
-                               p->from.type = D_CONST;
-                               p->from.offset = moreconst1;
-                               p->to.type = D_AX;
-
-                               p = appendp(p);
-                               p->as = ACALL;
-                               p->to.type = D_BRANCH;
-                               p->pcond = pmorestack[1];
-                               p->to.sym = symmorestack[1];
-                       } else
-                       if(moreconst1 == 0 && moreconst2 <= 48 && moreconst2%8 == 0) {
-                               i = moreconst2/8 + 3;
-                               p->as = ACALL;
-                               p->to.type = D_BRANCH;
-                               p->pcond = pmorestack[i];
-                               p->to.sym = symmorestack[i];
-                       } else
-                       if(moreconst1 == 0 && moreconst2 != 0) {
-                               p->as = AMOVL;
-                               p->from.type = D_CONST;
-                               p->from.offset = moreconst2;
-                               p->to.type = D_AX;
-
-                               p = appendp(p);
-                               p->as = ACALL;
-                               p->to.type = D_BRANCH;
-                               p->pcond = pmorestack[2];
-                               p->to.sym = symmorestack[2];
-                       } else {
-                               p->as = AMOVQ;
-                               p->from.type = D_CONST;
-                               p->from.offset = (uint64)moreconst2 << 32;
-                               p->from.offset |= moreconst1;
-                               p->to.type = D_AX;
-
-                               p = appendp(p);
-                               p->as = ACALL;
-                               p->to.type = D_BRANCH;
-                               p->pcond = pmorestack[3];
-                               p->to.sym = symmorestack[3];
-                       }
-                       
-                       p = appendp(p);
-                       p->as = AJMP;
-                       p->to.type = D_BRANCH;
-                       p->pcond = cursym->text->link;
+                       p = load_g_cx(p); // load g into CX
+                       p = stacksplit(p, autoffset, &q); // emit split check
                }
 
-               if(q != P)
-                       q->pcond = p->link;
-               if(q1 != P)
-                       q1->pcond = q->link;
-
                if(autoffset) {
                        p = appendp(p);
                        p->as = AADJSP;
@@ -895,6 +674,258 @@ dostkoff(void)
        }
 }
 
+// Append code to p to load g into cx.
+// Overwrites p with the first instruction (no first appendp).
+// Overwriting p is unusual but it lets use this in both the
+// prologue (caller must call appendp first) and in the epilogue.
+// Returns last new instruction.
+static Prog*
+load_g_cx(Prog *p)
+{
+       if(flag_shared) {
+               // Load TLS offset with MOVQ $runtime.tlsgm(SB), CX
+               p->as = AMOVQ;
+               p->from.type = D_EXTERN;
+               p->from.sym = gmsym;
+               p->to.type = D_CX;
+               p = appendp(p);
+       }
+       p->as = AMOVQ;
+       if(HEADTYPE == Hlinux || HEADTYPE == Hfreebsd
+       || HEADTYPE == Hopenbsd || HEADTYPE == Hnetbsd
+       || HEADTYPE == Hplan9x64 || HEADTYPE == Hdragonfly)
+               // ELF uses FS
+               p->from.type = D_INDIR+D_FS;
+       else
+               p->from.type = D_INDIR+D_GS;
+       if(flag_shared) {
+               // Add TLS offset stored in CX
+               p->from.index = p->from.type - D_INDIR;
+               p->from.type = D_INDIR + D_CX;
+       }
+       p->from.offset = tlsoffset+0;
+       p->to.type = D_CX;
+       if(HEADTYPE == Hwindows) {
+               // movq %gs:0x28, %rcx
+               // movq (%rcx), %rcx
+               p->as = AMOVQ;
+               p->from.type = D_INDIR+D_GS;
+               p->from.offset = 0x28;
+               p->to.type = D_CX;
+
+               p = appendp(p);
+               p->as = AMOVQ;
+               p->from.type = D_INDIR+D_CX;
+               p->from.offset = 0;
+               p->to.type = D_CX;
+       }
+       return p;
+}
+
+// Append code to p to check for stack split.
+// Appends to (does not overwrite) p.
+// Assumes g is in CX.
+// Returns last new instruction.
+// On return, *jmpok is the instruction that should jump
+// to the stack frame allocation if no split is needed.
+static Prog*
+stacksplit(Prog *p, int32 framesize, Prog **jmpok)
+{
+       Prog *q, *q1;
+       uint32 moreconst1, moreconst2, i;
+
+       if(debug['K']) {
+               // 6l -K means check not only for stack
+               // overflow but stack underflow.
+               // On underflow, INT 3 (breakpoint).
+               // Underflow itself is rare but this also
+               // catches out-of-sync stack guard info
+
+               p = appendp(p);
+               p->as = ACMPQ;
+               p->from.type = D_INDIR+D_CX;
+               p->from.offset = 8;
+               p->to.type = D_SP;
+
+               p = appendp(p);
+               p->as = AJHI;
+               p->to.type = D_BRANCH;
+               p->to.offset = 4;
+               q1 = p;
+
+               p = appendp(p);
+               p->as = AINT;
+               p->from.type = D_CONST;
+               p->from.offset = 3;
+
+               p = appendp(p);
+               p->as = ANOP;
+               q1->pcond = p;
+       }
+
+       q = P;
+       q1 = P;
+       if(framesize <= StackSmall) {
+               // small stack: SP <= stackguard
+               //      CMPQ SP, stackguard
+               p = appendp(p);
+               p->as = ACMPQ;
+               p->from.type = D_SP;
+               p->to.type = D_INDIR+D_CX;
+       } else if(framesize <= StackBig) {
+               // large stack: SP-framesize <= stackguard-StackSmall
+               //      LEAQ -xxx(SP), AX
+               //      CMPQ AX, stackguard
+               p = appendp(p);
+               p->as = ALEAQ;
+               p->from.type = D_INDIR+D_SP;
+               p->from.offset = -(framesize-StackSmall);
+               p->to.type = D_AX;
+
+               p = appendp(p);
+               p->as = ACMPQ;
+               p->from.type = D_AX;
+               p->to.type = D_INDIR+D_CX;
+       } else {
+               // Such a large stack we need to protect against wraparound.
+               // If SP is close to zero:
+               //      SP-stackguard+StackGuard <= framesize + (StackGuard-StackSmall)
+               // The +StackGuard on both sides is required to keep the left side positive:
+               // SP is allowed to be slightly below stackguard. See stack.h.
+               //
+               // Preemption sets stackguard to StackPreempt, a very large value.
+               // That breaks the math above, so we have to check for that explicitly.
+               //      MOVQ    stackguard, CX
+               //      CMPQ    CX, $StackPreempt
+               //      JEQ     label-of-call-to-morestack
+               //      LEAQ    StackGuard(SP), AX
+               //      SUBQ    CX, AX
+               //      CMPQ    AX, $(framesize+(StackGuard-StackSmall))
+
+               p = appendp(p);
+               p->as = AMOVQ;
+               p->from.type = D_INDIR+D_CX;
+               p->from.offset = 0;
+               p->to.type = D_SI;
+
+               p = appendp(p);
+               p->as = ACMPQ;
+               p->from.type = D_SI;
+               p->to.type = D_CONST;
+               p->to.offset = StackPreempt;
+
+               p = appendp(p);
+               p->as = AJEQ;
+               p->to.type = D_BRANCH;
+               q1 = p;
+
+               p = appendp(p);
+               p->as = ALEAQ;
+               p->from.type = D_INDIR+D_SP;
+               p->from.offset = StackGuard;
+               p->to.type = D_AX;
+               
+               p = appendp(p);
+               p->as = ASUBQ;
+               p->from.type = D_SI;
+               p->to.type = D_AX;
+               
+               p = appendp(p);
+               p->as = ACMPQ;
+               p->from.type = D_AX;
+               p->to.type = D_CONST;
+               p->to.offset = framesize+(StackGuard-StackSmall);
+       }                                       
+
+       // common
+       p = appendp(p);
+       p->as = AJHI;
+       p->to.type = D_BRANCH;
+       q = p;
+
+       // If we ask for more stack, we'll get a minimum of StackMin bytes.
+       // We need a stack frame large enough to hold the top-of-stack data,
+       // the function arguments+results, our caller's PC, our frame,
+       // a word for the return PC of the next call, and then the StackLimit bytes
+       // that must be available on entry to any function called from a function
+       // that did a stack check.  If StackMin is enough, don't ask for a specific
+       // amount: then we can use the custom functions and save a few
+       // instructions.
+       moreconst1 = 0;
+       if(StackTop + textarg + PtrSize + framesize + PtrSize + StackLimit >= StackMin)
+               moreconst1 = framesize;
+       moreconst2 = textarg;
+       if(moreconst2 == 1) // special marker
+               moreconst2 = 0;
+       if((moreconst2&7) != 0)
+               diag("misaligned argument size in stack split");
+       // 4 varieties varieties (const1==0 cross const2==0)
+       // and 6 subvarieties of (const1==0 and const2!=0)
+       p = appendp(p);
+       if(moreconst1 == 0 && moreconst2 == 0) {
+               p->as = ACALL;
+               p->to.type = D_BRANCH;
+               p->pcond = pmorestack[0];
+               p->to.sym = symmorestack[0];
+       } else
+       if(moreconst1 != 0 && moreconst2 == 0) {
+               p->as = AMOVL;
+               p->from.type = D_CONST;
+               p->from.offset = moreconst1;
+               p->to.type = D_AX;
+
+               p = appendp(p);
+               p->as = ACALL;
+               p->to.type = D_BRANCH;
+               p->pcond = pmorestack[1];
+               p->to.sym = symmorestack[1];
+       } else
+       if(moreconst1 == 0 && moreconst2 <= 48 && moreconst2%8 == 0) {
+               i = moreconst2/8 + 3;
+               p->as = ACALL;
+               p->to.type = D_BRANCH;
+               p->pcond = pmorestack[i];
+               p->to.sym = symmorestack[i];
+       } else
+       if(moreconst1 == 0 && moreconst2 != 0) {
+               p->as = AMOVL;
+               p->from.type = D_CONST;
+               p->from.offset = moreconst2;
+               p->to.type = D_AX;
+
+               p = appendp(p);
+               p->as = ACALL;
+               p->to.type = D_BRANCH;
+               p->pcond = pmorestack[2];
+               p->to.sym = symmorestack[2];
+       } else {
+               p->as = AMOVQ;
+               p->from.type = D_CONST;
+               p->from.offset = (uint64)moreconst2 << 32;
+               p->from.offset |= moreconst1;
+               p->to.type = D_AX;
+
+               p = appendp(p);
+               p->as = ACALL;
+               p->to.type = D_BRANCH;
+               p->pcond = pmorestack[3];
+               p->to.sym = symmorestack[3];
+       }
+       
+       p = appendp(p);
+       p->as = AJMP;
+       p->to.type = D_BRANCH;
+       p->pcond = cursym->text->link;
+       
+       if(q != P)
+               q->pcond = p->link;
+       if(q1 != P)
+               q1->pcond = q->link;
+
+       *jmpok = q;
+       return p;
+}
+
 vlong
 atolwhex(char *s)
 {
index 41e40b05b42a4c553c606236a016a7e213c5cb1a..2151adf9a83b0115f824495f9d60f16e4a2ceff6 100644 (file)
@@ -405,15 +405,19 @@ brloop(Prog *p)
        return q;
 }
 
+static Prog*   load_g_cx(Prog*);
+static Prog*   stacksplit(Prog*, int32, Prog**);
+
+static Sym *plan9_tos;
+static Prog *pmorestack;
+static Sym *symmorestack;
+
 void
 dostkoff(void)
 {
        Prog *p, *q, *q1;
        int32 autoffset, deltasp;
-       int a, arg;
-       Prog *pmorestack;
-       Sym *symmorestack;
-       Sym *plan9_tos;
+       int a;
 
        pmorestack = P;
        symmorestack = lookup("runtime.morestack", 0);
@@ -440,217 +444,13 @@ dostkoff(void)
 
                q = P;
                q1 = P;
-               if(pmorestack != P)
-               if(!(p->from.scale & NOSPLIT)) {
-                       p = appendp(p); // load g into CX
-                       switch(HEADTYPE) {
-                       case Hwindows:
-                               p->as = AMOVL;
-                               p->from.type = D_INDIR+D_FS;
-                               p->from.offset = 0x14;
-                               p->to.type = D_CX;
-
-                               p = appendp(p);
-                               p->as = AMOVL;
-                               p->from.type = D_INDIR+D_CX;
-                               p->from.offset = 0;
-                               p->to.type = D_CX;
-                               break;
-                       
-                       case Hlinux:
-                               if(linkmode != LinkExternal) {
-                                       p->as = AMOVL;
-                                       p->from.type = D_INDIR+D_GS;
-                                       p->from.offset = 0;
-                                       p->to.type = D_CX;
-
-                                       p = appendp(p);
-                                       p->as = AMOVL;
-                                       p->from.type = D_INDIR+D_CX;
-                                       p->from.offset = tlsoffset + 0;
-                                       p->to.type = D_CX;
-                               } else {
-                                       p->as = AMOVL;
-                                       p->from.type = D_INDIR+D_GS;
-                                       p->from.offset = tlsoffset + 0;
-                                       p->to.type = D_CX;
-                                       p->from.index = D_GS;
-                                       p->from.scale = 1;
-                               }
-                               break;
-                       
-                       case Hplan9x32:
-                               p->as = AMOVL;
-                               p->from.type = D_EXTERN;
-                               p->from.sym = plan9_tos;
-                               p->to.type = D_CX;
-                               
-                               p = appendp(p);
-                               p->as = AMOVL;
-                               p->from.type = D_INDIR+D_CX;
-                               p->from.offset = tlsoffset + 0;
-                               p->to.type = D_CX;                              
-                               break;
-                       
-                       default:
-                               p->as = AMOVL;
-                               p->from.type = D_INDIR+D_GS;
-                               p->from.offset = tlsoffset + 0;
-                               p->to.type = D_CX;
-                       }
-
-                       if(debug['K']) {
-                               // 8l -K means check not only for stack
-                               // overflow but stack underflow.
-                               // On underflow, INT 3 (breakpoint).
-                               // Underflow itself is rare but this also
-                               // catches out-of-sync stack guard info.
-                               p = appendp(p);
-                               p->as = ACMPL;
-                               p->from.type = D_INDIR+D_CX;
-                               p->from.offset = 4;
-                               p->to.type = D_SP;
-
-                               p = appendp(p);
-                               p->as = AJCC;
-                               p->to.type = D_BRANCH;
-                               p->to.offset = 4;
-                               q1 = p;
-
-                               p = appendp(p);
-                               p->as = AINT;
-                               p->from.type = D_CONST;
-                               p->from.offset = 3;
-                               
-                               p = appendp(p);
-                               p->as = ANOP;
-                               q1->pcond = p;
-                       }
-                       q1 = P;
-
-                       if(autoffset <= StackSmall) {
-                               // small stack: SP <= stackguard
-                               //      CMPL SP, stackguard
-                               p = appendp(p);
-                               p->as = ACMPL;
-                               p->from.type = D_SP;
-                               p->to.type = D_INDIR+D_CX;
-                       } else if(autoffset <= StackBig) {
-                               // large stack: SP-framesize <= stackguard-StackSmall
-                               //      LEAL -(autoffset-StackSmall)(SP), AX
-                               //      CMPL AX, stackguard
-                               p = appendp(p);
-                               p->as = ALEAL;
-                               p->from.type = D_INDIR+D_SP;
-                               p->from.offset = -(autoffset-StackSmall);
-                               p->to.type = D_AX;
-
-                               p = appendp(p);
-                               p->as = ACMPL;
-                               p->from.type = D_AX;
-                               p->to.type = D_INDIR+D_CX;
-                       } else {
-                               // Such a large stack we need to protect against wraparound
-                               // if SP is close to zero.
-                               //      SP-stackguard+StackGuard <= framesize + (StackGuard-StackSmall)
-                               // The +StackGuard on both sides is required to keep the left side positive:
-                               // SP is allowed to be slightly below stackguard. See stack.h.
-                               //
-                               // Preemption sets stackguard to StackPreempt, a very large value.
-                               // That breaks the math above, so we have to check for that explicitly.
-                               //      MOVL    stackguard, CX
-                               //      CMPL    CX, $StackPreempt
-                               //      JEQ     label-of-call-to-morestack
-                               //      LEAL    StackGuard(SP), AX
-                               //      SUBL    stackguard, AX
-                               //      CMPL    AX, $(autoffset+(StackGuard-StackSmall))
-                               p = appendp(p);
-                               p->as = AMOVL;
-                               p->from.type = D_INDIR+D_CX;
-                               p->from.offset = 0;
-                               p->to.type = D_SI;
-
-                               p = appendp(p);
-                               p->as = ACMPL;
-                               p->from.type = D_SI;
-                               p->to.type = D_CONST;
-                               p->to.offset = (uint32)StackPreempt;
-
-                               p = appendp(p);
-                               p->as = AJEQ;
-                               p->to.type = D_BRANCH;
-                               q1 = p;
-
-                               p = appendp(p);
-                               p->as = ALEAL;
-                               p->from.type = D_INDIR+D_SP;
-                               p->from.offset = StackGuard;
-                               p->to.type = D_AX;
-                               
-                               p = appendp(p);
-                               p->as = ASUBL;
-                               p->from.type = D_SI;
-                               p->from.offset = 0;
-                               p->to.type = D_AX;
-                               
-                               p = appendp(p);
-                               p->as = ACMPL;
-                               p->from.type = D_AX;
-                               p->to.type = D_CONST;
-                               p->to.offset = autoffset+(StackGuard-StackSmall);
-                       }               
-                                       
-                       // common
-                       p = appendp(p);
-                       p->as = AJHI;
-                       p->to.type = D_BRANCH;
-                       p->to.offset = 4;
-                       q = p;
-
-                       p = appendp(p); // save frame size in DI
-                       p->as = AMOVL;
-                       p->to.type = D_DI;
-                       p->from.type = D_CONST;
-
-                       // If we ask for more stack, we'll get a minimum of StackMin bytes.
-                       // We need a stack frame large enough to hold the top-of-stack data,
-                       // the function arguments+results, our caller's PC, our frame,
-                       // a word for the return PC of the next call, and then the StackLimit bytes
-                       // that must be available on entry to any function called from a function
-                       // that did a stack check.  If StackMin is enough, don't ask for a specific
-                       // amount: then we can use the custom functions and save a few
-                       // instructions.
-                       if(StackTop + cursym->text->to.offset2 + PtrSize + autoffset + PtrSize + StackLimit >= StackMin)
-                               p->from.offset = (autoffset+7) & ~7LL;
-
-                       arg = cursym->text->to.offset2;
-                       if(arg == 1) // special marker for known 0
-                               arg = 0;
-                       if(arg&3)
-                               diag("misaligned argument size in stack split");
-                       p = appendp(p); // save arg size in AX
-                       p->as = AMOVL;
-                       p->to.type = D_AX;
-                       p->from.type = D_CONST;
-                       p->from.offset = arg;
-
-                       p = appendp(p);
-                       p->as = ACALL;
-                       p->to.type = D_BRANCH;
-                       p->pcond = pmorestack;
-                       p->to.sym = symmorestack;
 
+               if(!(p->from.scale & NOSPLIT)) {
                        p = appendp(p);
-                       p->as = AJMP;
-                       p->to.type = D_BRANCH;
-                       p->pcond = cursym->text->link;
+                       p = load_g_cx(p); // load g into CX
+                       p = stacksplit(p, autoffset, &q); // emit split check
                }
 
-               if(q != P)
-                       q->pcond = p->link;
-               if(q1 != P)
-                       q1->pcond = q->link;
-
                if(autoffset) {
                        p = appendp(p);
                        p->as = AADJSP;
@@ -761,6 +561,239 @@ dostkoff(void)
        }
 }
 
+// Append code to p to load g into cx.
+// Overwrites p with the first instruction (no first appendp).
+// Overwriting p is unusual but it lets use this in both the
+// prologue (caller must call appendp first) and in the epilogue.
+// Returns last new instruction.
+static Prog*
+load_g_cx(Prog *p)
+{
+       switch(HEADTYPE) {
+       case Hwindows:
+               p->as = AMOVL;
+               p->from.type = D_INDIR+D_FS;
+               p->from.offset = 0x14;
+               p->to.type = D_CX;
+
+               p = appendp(p);
+               p->as = AMOVL;
+               p->from.type = D_INDIR+D_CX;
+               p->from.offset = 0;
+               p->to.type = D_CX;
+               break;
+       
+       case Hlinux:
+               if(linkmode != LinkExternal) {
+                       p->as = AMOVL;
+                       p->from.type = D_INDIR+D_GS;
+                       p->from.offset = 0;
+                       p->to.type = D_CX;
+
+                       p = appendp(p);
+                       p->as = AMOVL;
+                       p->from.type = D_INDIR+D_CX;
+                       p->from.offset = tlsoffset + 0;
+                       p->to.type = D_CX;
+               } else {
+                       p->as = AMOVL;
+                       p->from.type = D_INDIR+D_GS;
+                       p->from.offset = tlsoffset + 0;
+                       p->to.type = D_CX;
+                       p->from.index = D_GS;
+                       p->from.scale = 1;
+               }
+               break;
+       
+       case Hplan9x32:
+               p->as = AMOVL;
+               p->from.type = D_EXTERN;
+               p->from.sym = plan9_tos;
+               p->to.type = D_CX;
+               
+               p = appendp(p);
+               p->as = AMOVL;
+               p->from.type = D_INDIR+D_CX;
+               p->from.offset = tlsoffset + 0;
+               p->to.type = D_CX;                              
+               break;
+       
+       default:
+               p->as = AMOVL;
+               p->from.type = D_INDIR+D_GS;
+               p->from.offset = tlsoffset + 0;
+               p->to.type = D_CX;
+       }
+       return p;
+}
+
+// Append code to p to check for stack split.
+// Appends to (does not overwrite) p.
+// Assumes g is in CX.
+// Returns last new instruction.
+// On return, *jmpok is the instruction that should jump
+// to the stack frame allocation if no split is needed.
+static Prog*
+stacksplit(Prog *p, int32 framesize, Prog **jmpok)
+{
+       Prog *q, *q1;
+       int arg;
+
+       if(debug['K']) {
+               // 8l -K means check not only for stack
+               // overflow but stack underflow.
+               // On underflow, INT 3 (breakpoint).
+               // Underflow itself is rare but this also
+               // catches out-of-sync stack guard info.
+               p = appendp(p);
+               p->as = ACMPL;
+               p->from.type = D_INDIR+D_CX;
+               p->from.offset = 4;
+               p->to.type = D_SP;
+
+               p = appendp(p);
+               p->as = AJCC;
+               p->to.type = D_BRANCH;
+               p->to.offset = 4;
+               q1 = p;
+
+               p = appendp(p);
+               p->as = AINT;
+               p->from.type = D_CONST;
+               p->from.offset = 3;
+               
+               p = appendp(p);
+               p->as = ANOP;
+               q1->pcond = p;
+       }
+       q1 = P;
+
+       if(framesize <= StackSmall) {
+               // small stack: SP <= stackguard
+               //      CMPL SP, stackguard
+               p = appendp(p);
+               p->as = ACMPL;
+               p->from.type = D_SP;
+               p->to.type = D_INDIR+D_CX;
+       } else if(framesize <= StackBig) {
+               // large stack: SP-framesize <= stackguard-StackSmall
+               //      LEAL -(framesize-StackSmall)(SP), AX
+               //      CMPL AX, stackguard
+               p = appendp(p);
+               p->as = ALEAL;
+               p->from.type = D_INDIR+D_SP;
+               p->from.offset = -(framesize-StackSmall);
+               p->to.type = D_AX;
+
+               p = appendp(p);
+               p->as = ACMPL;
+               p->from.type = D_AX;
+               p->to.type = D_INDIR+D_CX;
+       } else {
+               // Such a large stack we need to protect against wraparound
+               // if SP is close to zero.
+               //      SP-stackguard+StackGuard <= framesize + (StackGuard-StackSmall)
+               // The +StackGuard on both sides is required to keep the left side positive:
+               // SP is allowed to be slightly below stackguard. See stack.h.
+               //
+               // Preemption sets stackguard to StackPreempt, a very large value.
+               // That breaks the math above, so we have to check for that explicitly.
+               //      MOVL    stackguard, CX
+               //      CMPL    CX, $StackPreempt
+               //      JEQ     label-of-call-to-morestack
+               //      LEAL    StackGuard(SP), AX
+               //      SUBL    stackguard, AX
+               //      CMPL    AX, $(framesize+(StackGuard-StackSmall))
+               p = appendp(p);
+               p->as = AMOVL;
+               p->from.type = D_INDIR+D_CX;
+               p->from.offset = 0;
+               p->to.type = D_SI;
+
+               p = appendp(p);
+               p->as = ACMPL;
+               p->from.type = D_SI;
+               p->to.type = D_CONST;
+               p->to.offset = (uint32)StackPreempt;
+
+               p = appendp(p);
+               p->as = AJEQ;
+               p->to.type = D_BRANCH;
+               q1 = p;
+
+               p = appendp(p);
+               p->as = ALEAL;
+               p->from.type = D_INDIR+D_SP;
+               p->from.offset = StackGuard;
+               p->to.type = D_AX;
+               
+               p = appendp(p);
+               p->as = ASUBL;
+               p->from.type = D_SI;
+               p->from.offset = 0;
+               p->to.type = D_AX;
+               
+               p = appendp(p);
+               p->as = ACMPL;
+               p->from.type = D_AX;
+               p->to.type = D_CONST;
+               p->to.offset = framesize+(StackGuard-StackSmall);
+       }               
+                       
+       // common
+       p = appendp(p);
+       p->as = AJHI;
+       p->to.type = D_BRANCH;
+       p->to.offset = 4;
+       q = p;
+
+       p = appendp(p); // save frame size in DI
+       p->as = AMOVL;
+       p->to.type = D_DI;
+       p->from.type = D_CONST;
+
+       // If we ask for more stack, we'll get a minimum of StackMin bytes.
+       // We need a stack frame large enough to hold the top-of-stack data,
+       // the function arguments+results, our caller's PC, our frame,
+       // a word for the return PC of the next call, and then the StackLimit bytes
+       // that must be available on entry to any function called from a function
+       // that did a stack check.  If StackMin is enough, don't ask for a specific
+       // amount: then we can use the custom functions and save a few
+       // instructions.
+       if(StackTop + cursym->text->to.offset2 + PtrSize + framesize + PtrSize + StackLimit >= StackMin)
+               p->from.offset = (framesize+7) & ~7LL;
+
+       arg = cursym->text->to.offset2;
+       if(arg == 1) // special marker for known 0
+               arg = 0;
+       if(arg&3)
+               diag("misaligned argument size in stack split");
+       p = appendp(p); // save arg size in AX
+       p->as = AMOVL;
+       p->to.type = D_AX;
+       p->from.type = D_CONST;
+       p->from.offset = arg;
+
+       p = appendp(p);
+       p->as = ACALL;
+       p->to.type = D_BRANCH;
+       p->pcond = pmorestack;
+       p->to.sym = symmorestack;
+
+       p = appendp(p);
+       p->as = AJMP;
+       p->to.type = D_BRANCH;
+       p->pcond = cursym->text->link;
+
+       if(q != P)
+               q->pcond = p->link;
+       if(q1 != P)
+               q1->pcond = q->link;
+       
+       *jmpok = q;
+       return p;
+}
+
 int32
 atolwhex(char *s)
 {