From 1a6576db3468566ec89671c2c191e0b975833a7f Mon Sep 17 00:00:00 2001 From: Russ Cox Date: Wed, 11 Sep 2013 20:29:45 -0400 Subject: [PATCH] cmd/5l, cmd/6l, cmd/8l: refactor stack split code Pull the stack split generation into its own function. This will make an upcoming change to fix recover easier to digest. R=ken2 CC=golang-dev https://golang.org/cl/13611044 --- src/cmd/5l/noop.c | 277 +++++++++++++------------- src/cmd/6l/pass.c | 493 ++++++++++++++++++++++++---------------------- src/cmd/8l/pass.c | 455 ++++++++++++++++++++++-------------------- 3 files changed, 650 insertions(+), 575 deletions(-) diff --git a/src/cmd/5l/noop.c b/src/cmd/5l/noop.c index 44f4c22cf2..7b63aa7156 100644 --- a/src/cmd/5l/noop.c +++ b/src/cmd/5l/noop.c @@ -38,6 +38,10 @@ static Sym* sym_div; static Sym* sym_divu; static Sym* sym_mod; static Sym* sym_modu; +static Sym* symmorestack; +static Prog* pmorestack; + +static Prog* stacksplit(Prog*, int32); static void linkcase(Prog *casep) @@ -58,9 +62,7 @@ noops(void) { Prog *p, *q, *q1, *q2; int o; - int32 arg; - Prog *pmorestack; - Sym *symmorestack, *tlsfallback, *gmsym; + Sym *tlsfallback, *gmsym; /* * find leaf subroutines @@ -256,136 +258,8 @@ noops(void) break; } - if(!(p->reg & NOSPLIT)) { - // MOVW g_stackguard(g), R1 - p = appendp(p); - p->as = AMOVW; - p->from.type = D_OREG; - p->from.reg = REGG; - p->to.type = D_REG; - p->to.reg = 1; - - if(autosize <= StackSmall) { - // small stack: SP < stackguard - // CMP stackguard, SP - p = appendp(p); - p->as = ACMP; - p->from.type = D_REG; - p->from.reg = 1; - p->reg = REGSP; - } else if(autosize <= StackBig) { - // large stack: SP-framesize < stackguard-StackSmall - // MOVW $-autosize(SP), R2 - // CMP stackguard, R2 - p = appendp(p); - p->as = AMOVW; - p->from.type = D_CONST; - p->from.reg = REGSP; - p->from.offset = -autosize; - p->to.type = D_REG; - p->to.reg = 2; - - p = appendp(p); - p->as = ACMP; - p->from.type = D_REG; - p->from.reg = 1; - p->reg = 2; - } else { - // Such a large stack we need to protect against wraparound - // if SP is close to zero. - // SP-stackguard+StackGuard < framesize + (StackGuard-StackSmall) - // The +StackGuard on both sides is required to keep the left side positive: - // SP is allowed to be slightly below stackguard. See stack.h. - // CMP $StackPreempt, R1 - // MOVW.NE $StackGuard(SP), R2 - // SUB.NE R1, R2 - // MOVW.NE $(autosize+(StackGuard-StackSmall)), R3 - // CMP.NE R3, R2 - p = appendp(p); - p->as = ACMP; - p->from.type = D_CONST; - p->from.offset = (uint32)StackPreempt; - p->reg = 1; - - p = appendp(p); - p->as = AMOVW; - p->from.type = D_CONST; - p->from.reg = REGSP; - p->from.offset = StackGuard; - p->to.type = D_REG; - p->to.reg = 2; - p->scond = C_SCOND_NE; - - p = appendp(p); - p->as = ASUB; - p->from.type = D_REG; - p->from.reg = 1; - p->to.type = D_REG; - p->to.reg = 2; - p->scond = C_SCOND_NE; - - p = appendp(p); - p->as = AMOVW; - p->from.type = D_CONST; - p->from.offset = autosize + (StackGuard - StackSmall); - p->to.type = D_REG; - p->to.reg = 3; - p->scond = C_SCOND_NE; - - p = appendp(p); - p->as = ACMP; - p->from.type = D_REG; - p->from.reg = 3; - p->reg = 2; - p->scond = C_SCOND_NE; - } - - // MOVW.LS $autosize, R1 - p = appendp(p); - p->as = AMOVW; - p->scond = C_SCOND_LS; - p->from.type = D_CONST; - p->from.offset = autosize; - p->to.type = D_REG; - p->to.reg = 1; - - // MOVW.LS $args, R2 - p = appendp(p); - p->as = AMOVW; - p->scond = C_SCOND_LS; - p->from.type = D_CONST; - arg = cursym->text->to.offset2; - if(arg == 1) // special marker for known 0 - arg = 0; - if(arg&3) - diag("misaligned argument size in stack split"); - p->from.offset = arg; - p->to.type = D_REG; - p->to.reg = 2; - - // MOVW.LS R14, R3 - p = appendp(p); - p->as = AMOVW; - p->scond = C_SCOND_LS; - p->from.type = D_REG; - p->from.reg = REGLINK; - p->to.type = D_REG; - p->to.reg = 3; - - // BL.LS runtime.morestack(SB) // modifies LR, returns with LO still asserted - p = appendp(p); - p->as = ABL; - p->scond = C_SCOND_LS; - p->to.type = D_BRANCH; - p->to.sym = symmorestack; - p->cond = pmorestack; - - // BLS start - p = appendp(p); - p->as = ABLS; - p->to.type = D_BRANCH; - p->cond = cursym->text->link; - } + if(!(p->reg & NOSPLIT)) + p = stacksplit(p, autosize); // emit split check // MOVW.W R14,$-autosize(SP) p = appendp(p); @@ -554,6 +428,143 @@ noops(void) } } +static Prog* +stacksplit(Prog *p, int32 framesize) +{ + int32 arg; + + // MOVW g_stackguard(g), R1 + p = appendp(p); + p->as = AMOVW; + p->from.type = D_OREG; + p->from.reg = REGG; + p->to.type = D_REG; + p->to.reg = 1; + + if(framesize <= StackSmall) { + // small stack: SP < stackguard + // CMP stackguard, SP + p = appendp(p); + p->as = ACMP; + p->from.type = D_REG; + p->from.reg = 1; + p->reg = REGSP; + } else if(framesize <= StackBig) { + // large stack: SP-framesize < stackguard-StackSmall + // MOVW $-framesize(SP), R2 + // CMP stackguard, R2 + p = appendp(p); + p->as = AMOVW; + p->from.type = D_CONST; + p->from.reg = REGSP; + p->from.offset = -framesize; + p->to.type = D_REG; + p->to.reg = 2; + + p = appendp(p); + p->as = ACMP; + p->from.type = D_REG; + p->from.reg = 1; + p->reg = 2; + } else { + // Such a large stack we need to protect against wraparound + // if SP is close to zero. + // SP-stackguard+StackGuard < framesize + (StackGuard-StackSmall) + // The +StackGuard on both sides is required to keep the left side positive: + // SP is allowed to be slightly below stackguard. See stack.h. + // CMP $StackPreempt, R1 + // MOVW.NE $StackGuard(SP), R2 + // SUB.NE R1, R2 + // MOVW.NE $(framesize+(StackGuard-StackSmall)), R3 + // CMP.NE R3, R2 + p = appendp(p); + p->as = ACMP; + p->from.type = D_CONST; + p->from.offset = (uint32)StackPreempt; + p->reg = 1; + + p = appendp(p); + p->as = AMOVW; + p->from.type = D_CONST; + p->from.reg = REGSP; + p->from.offset = StackGuard; + p->to.type = D_REG; + p->to.reg = 2; + p->scond = C_SCOND_NE; + + p = appendp(p); + p->as = ASUB; + p->from.type = D_REG; + p->from.reg = 1; + p->to.type = D_REG; + p->to.reg = 2; + p->scond = C_SCOND_NE; + + p = appendp(p); + p->as = AMOVW; + p->from.type = D_CONST; + p->from.offset = framesize + (StackGuard - StackSmall); + p->to.type = D_REG; + p->to.reg = 3; + p->scond = C_SCOND_NE; + + p = appendp(p); + p->as = ACMP; + p->from.type = D_REG; + p->from.reg = 3; + p->reg = 2; + p->scond = C_SCOND_NE; + } + + // MOVW.LS $framesize, R1 + p = appendp(p); + p->as = AMOVW; + p->scond = C_SCOND_LS; + p->from.type = D_CONST; + p->from.offset = framesize; + p->to.type = D_REG; + p->to.reg = 1; + + // MOVW.LS $args, R2 + p = appendp(p); + p->as = AMOVW; + p->scond = C_SCOND_LS; + p->from.type = D_CONST; + arg = cursym->text->to.offset2; + if(arg == 1) // special marker for known 0 + arg = 0; + if(arg&3) + diag("misaligned argument size in stack split"); + p->from.offset = arg; + p->to.type = D_REG; + p->to.reg = 2; + + // MOVW.LS R14, R3 + p = appendp(p); + p->as = AMOVW; + p->scond = C_SCOND_LS; + p->from.type = D_REG; + p->from.reg = REGLINK; + p->to.type = D_REG; + p->to.reg = 3; + + // BL.LS runtime.morestack(SB) // modifies LR, returns with LO still asserted + p = appendp(p); + p->as = ABL; + p->scond = C_SCOND_LS; + p->to.type = D_BRANCH; + p->to.sym = symmorestack; + p->cond = pmorestack; + + // BLS start + p = appendp(p); + p->as = ABLS; + p->to.type = D_BRANCH; + p->cond = cursym->text->link; + + return p; +} + static void sigdiv(char *n) { diff --git a/src/cmd/6l/pass.c b/src/cmd/6l/pass.c index be1bc4f070..d24672432f 100644 --- a/src/cmd/6l/pass.c +++ b/src/cmd/6l/pass.c @@ -271,8 +271,9 @@ patch(void) { int32 c; Prog *p, *q; - Sym *s, *gmsym; + Sym *s; int32 vexit; + Sym *gmsym; if(debug['v']) Bprint(&bso, "%5.2f mkfwd\n", cputime()); @@ -467,6 +468,10 @@ morename[] = }; Prog* pmorestack[nelem(morename)]; Sym* symmorestack[nelem(morename)]; +Sym* gmsym; + +static Prog* load_g_cx(Prog*); +static Prog* stacksplit(Prog*, int32, Prog**); void dostkoff(void) @@ -474,9 +479,7 @@ dostkoff(void) Prog *p, *q, *q1; int32 autoffset, deltasp; int a, pcsize; - uint32 moreconst1, moreconst2, i; - Sym *gmsym; - + uint32 i; gmsym = lookup("runtime.tlsgm", 0); for(i=0; ifrom.scale & NOSPLIT) && autoffset >= StackSmall) diag("nosplit func likely to overflow stack"); + q = P; if(!(p->from.scale & NOSPLIT)) { - if(flag_shared) { - // Load TLS offset with MOVQ $runtime.tlsgm(SB), CX - p = appendp(p); - p->as = AMOVQ; - p->from.type = D_EXTERN; - p->from.sym = gmsym; - p->to.type = D_CX; - } - p = appendp(p); // load g into CX - p->as = AMOVQ; - if(HEADTYPE == Hlinux || HEADTYPE == Hfreebsd - || HEADTYPE == Hopenbsd || HEADTYPE == Hnetbsd - || HEADTYPE == Hplan9x64 || HEADTYPE == Hdragonfly) - // ELF uses FS - p->from.type = D_INDIR+D_FS; - else - p->from.type = D_INDIR+D_GS; - if(flag_shared) { - // Add TLS offset stored in CX - p->from.index = p->from.type - D_INDIR; - p->from.type = D_INDIR + D_CX; - } - p->from.offset = tlsoffset+0; - p->to.type = D_CX; - if(HEADTYPE == Hwindows) { - // movq %gs:0x28, %rcx - // movq (%rcx), %rcx - p->as = AMOVQ; - p->from.type = D_INDIR+D_GS; - p->from.offset = 0x28; - p->to.type = D_CX; - - - p = appendp(p); - p->as = AMOVQ; - p->from.type = D_INDIR+D_CX; - p->from.offset = 0; - p->to.type = D_CX; - } - - if(debug['K']) { - // 6l -K means check not only for stack - // overflow but stack underflow. - // On underflow, INT 3 (breakpoint). - // Underflow itself is rare but this also - // catches out-of-sync stack guard info - - p = appendp(p); - p->as = ACMPQ; - p->from.type = D_INDIR+D_CX; - p->from.offset = 8; - p->to.type = D_SP; - - p = appendp(p); - p->as = AJHI; - p->to.type = D_BRANCH; - p->to.offset = 4; - q1 = p; - - p = appendp(p); - p->as = AINT; - p->from.type = D_CONST; - p->from.offset = 3; - - p = appendp(p); - p->as = ANOP; - q1->pcond = p; - } - - q1 = P; - if(autoffset <= StackSmall) { - // small stack: SP <= stackguard - // CMPQ SP, stackguard - p = appendp(p); - p->as = ACMPQ; - p->from.type = D_SP; - p->to.type = D_INDIR+D_CX; - } else if(autoffset <= StackBig) { - // large stack: SP-framesize <= stackguard-StackSmall - // LEAQ -xxx(SP), AX - // CMPQ AX, stackguard - p = appendp(p); - p->as = ALEAQ; - p->from.type = D_INDIR+D_SP; - p->from.offset = -(autoffset-StackSmall); - p->to.type = D_AX; - - p = appendp(p); - p->as = ACMPQ; - p->from.type = D_AX; - p->to.type = D_INDIR+D_CX; - } else { - // Such a large stack we need to protect against wraparound. - // If SP is close to zero: - // SP-stackguard+StackGuard <= framesize + (StackGuard-StackSmall) - // The +StackGuard on both sides is required to keep the left side positive: - // SP is allowed to be slightly below stackguard. See stack.h. - // - // Preemption sets stackguard to StackPreempt, a very large value. - // That breaks the math above, so we have to check for that explicitly. - // MOVQ stackguard, CX - // CMPQ CX, $StackPreempt - // JEQ label-of-call-to-morestack - // LEAQ StackGuard(SP), AX - // SUBQ CX, AX - // CMPQ AX, $(autoffset+(StackGuard-StackSmall)) - - p = appendp(p); - p->as = AMOVQ; - p->from.type = D_INDIR+D_CX; - p->from.offset = 0; - p->to.type = D_SI; - - p = appendp(p); - p->as = ACMPQ; - p->from.type = D_SI; - p->to.type = D_CONST; - p->to.offset = StackPreempt; - - p = appendp(p); - p->as = AJEQ; - p->to.type = D_BRANCH; - q1 = p; - - p = appendp(p); - p->as = ALEAQ; - p->from.type = D_INDIR+D_SP; - p->from.offset = StackGuard; - p->to.type = D_AX; - - p = appendp(p); - p->as = ASUBQ; - p->from.type = D_SI; - p->to.type = D_AX; - - p = appendp(p); - p->as = ACMPQ; - p->from.type = D_AX; - p->to.type = D_CONST; - p->to.offset = autoffset+(StackGuard-StackSmall); - } - - // common - p = appendp(p); - p->as = AJHI; - p->to.type = D_BRANCH; - q = p; - - // If we ask for more stack, we'll get a minimum of StackMin bytes. - // We need a stack frame large enough to hold the top-of-stack data, - // the function arguments+results, our caller's PC, our frame, - // a word for the return PC of the next call, and then the StackLimit bytes - // that must be available on entry to any function called from a function - // that did a stack check. If StackMin is enough, don't ask for a specific - // amount: then we can use the custom functions and save a few - // instructions. - moreconst1 = 0; - if(StackTop + textarg + PtrSize + autoffset + PtrSize + StackLimit >= StackMin) - moreconst1 = autoffset; - moreconst2 = textarg; - if(moreconst2 == 1) // special marker - moreconst2 = 0; - if((moreconst2&7) != 0) - diag("misaligned argument size in stack split"); - // 4 varieties varieties (const1==0 cross const2==0) - // and 6 subvarieties of (const1==0 and const2!=0) p = appendp(p); - if(moreconst1 == 0 && moreconst2 == 0) { - p->as = ACALL; - p->to.type = D_BRANCH; - p->pcond = pmorestack[0]; - p->to.sym = symmorestack[0]; - } else - if(moreconst1 != 0 && moreconst2 == 0) { - p->as = AMOVL; - p->from.type = D_CONST; - p->from.offset = moreconst1; - p->to.type = D_AX; - - p = appendp(p); - p->as = ACALL; - p->to.type = D_BRANCH; - p->pcond = pmorestack[1]; - p->to.sym = symmorestack[1]; - } else - if(moreconst1 == 0 && moreconst2 <= 48 && moreconst2%8 == 0) { - i = moreconst2/8 + 3; - p->as = ACALL; - p->to.type = D_BRANCH; - p->pcond = pmorestack[i]; - p->to.sym = symmorestack[i]; - } else - if(moreconst1 == 0 && moreconst2 != 0) { - p->as = AMOVL; - p->from.type = D_CONST; - p->from.offset = moreconst2; - p->to.type = D_AX; - - p = appendp(p); - p->as = ACALL; - p->to.type = D_BRANCH; - p->pcond = pmorestack[2]; - p->to.sym = symmorestack[2]; - } else { - p->as = AMOVQ; - p->from.type = D_CONST; - p->from.offset = (uint64)moreconst2 << 32; - p->from.offset |= moreconst1; - p->to.type = D_AX; - - p = appendp(p); - p->as = ACALL; - p->to.type = D_BRANCH; - p->pcond = pmorestack[3]; - p->to.sym = symmorestack[3]; - } - - p = appendp(p); - p->as = AJMP; - p->to.type = D_BRANCH; - p->pcond = cursym->text->link; + p = load_g_cx(p); // load g into CX + p = stacksplit(p, autoffset, &q); // emit split check } - if(q != P) - q->pcond = p->link; - if(q1 != P) - q1->pcond = q->link; - if(autoffset) { p = appendp(p); p->as = AADJSP; @@ -895,6 +674,258 @@ dostkoff(void) } } +// Append code to p to load g into cx. +// Overwrites p with the first instruction (no first appendp). +// Overwriting p is unusual but it lets use this in both the +// prologue (caller must call appendp first) and in the epilogue. +// Returns last new instruction. +static Prog* +load_g_cx(Prog *p) +{ + if(flag_shared) { + // Load TLS offset with MOVQ $runtime.tlsgm(SB), CX + p->as = AMOVQ; + p->from.type = D_EXTERN; + p->from.sym = gmsym; + p->to.type = D_CX; + p = appendp(p); + } + p->as = AMOVQ; + if(HEADTYPE == Hlinux || HEADTYPE == Hfreebsd + || HEADTYPE == Hopenbsd || HEADTYPE == Hnetbsd + || HEADTYPE == Hplan9x64 || HEADTYPE == Hdragonfly) + // ELF uses FS + p->from.type = D_INDIR+D_FS; + else + p->from.type = D_INDIR+D_GS; + if(flag_shared) { + // Add TLS offset stored in CX + p->from.index = p->from.type - D_INDIR; + p->from.type = D_INDIR + D_CX; + } + p->from.offset = tlsoffset+0; + p->to.type = D_CX; + if(HEADTYPE == Hwindows) { + // movq %gs:0x28, %rcx + // movq (%rcx), %rcx + p->as = AMOVQ; + p->from.type = D_INDIR+D_GS; + p->from.offset = 0x28; + p->to.type = D_CX; + + p = appendp(p); + p->as = AMOVQ; + p->from.type = D_INDIR+D_CX; + p->from.offset = 0; + p->to.type = D_CX; + } + return p; +} + +// Append code to p to check for stack split. +// Appends to (does not overwrite) p. +// Assumes g is in CX. +// Returns last new instruction. +// On return, *jmpok is the instruction that should jump +// to the stack frame allocation if no split is needed. +static Prog* +stacksplit(Prog *p, int32 framesize, Prog **jmpok) +{ + Prog *q, *q1; + uint32 moreconst1, moreconst2, i; + + if(debug['K']) { + // 6l -K means check not only for stack + // overflow but stack underflow. + // On underflow, INT 3 (breakpoint). + // Underflow itself is rare but this also + // catches out-of-sync stack guard info + + p = appendp(p); + p->as = ACMPQ; + p->from.type = D_INDIR+D_CX; + p->from.offset = 8; + p->to.type = D_SP; + + p = appendp(p); + p->as = AJHI; + p->to.type = D_BRANCH; + p->to.offset = 4; + q1 = p; + + p = appendp(p); + p->as = AINT; + p->from.type = D_CONST; + p->from.offset = 3; + + p = appendp(p); + p->as = ANOP; + q1->pcond = p; + } + + q = P; + q1 = P; + if(framesize <= StackSmall) { + // small stack: SP <= stackguard + // CMPQ SP, stackguard + p = appendp(p); + p->as = ACMPQ; + p->from.type = D_SP; + p->to.type = D_INDIR+D_CX; + } else if(framesize <= StackBig) { + // large stack: SP-framesize <= stackguard-StackSmall + // LEAQ -xxx(SP), AX + // CMPQ AX, stackguard + p = appendp(p); + p->as = ALEAQ; + p->from.type = D_INDIR+D_SP; + p->from.offset = -(framesize-StackSmall); + p->to.type = D_AX; + + p = appendp(p); + p->as = ACMPQ; + p->from.type = D_AX; + p->to.type = D_INDIR+D_CX; + } else { + // Such a large stack we need to protect against wraparound. + // If SP is close to zero: + // SP-stackguard+StackGuard <= framesize + (StackGuard-StackSmall) + // The +StackGuard on both sides is required to keep the left side positive: + // SP is allowed to be slightly below stackguard. See stack.h. + // + // Preemption sets stackguard to StackPreempt, a very large value. + // That breaks the math above, so we have to check for that explicitly. + // MOVQ stackguard, CX + // CMPQ CX, $StackPreempt + // JEQ label-of-call-to-morestack + // LEAQ StackGuard(SP), AX + // SUBQ CX, AX + // CMPQ AX, $(framesize+(StackGuard-StackSmall)) + + p = appendp(p); + p->as = AMOVQ; + p->from.type = D_INDIR+D_CX; + p->from.offset = 0; + p->to.type = D_SI; + + p = appendp(p); + p->as = ACMPQ; + p->from.type = D_SI; + p->to.type = D_CONST; + p->to.offset = StackPreempt; + + p = appendp(p); + p->as = AJEQ; + p->to.type = D_BRANCH; + q1 = p; + + p = appendp(p); + p->as = ALEAQ; + p->from.type = D_INDIR+D_SP; + p->from.offset = StackGuard; + p->to.type = D_AX; + + p = appendp(p); + p->as = ASUBQ; + p->from.type = D_SI; + p->to.type = D_AX; + + p = appendp(p); + p->as = ACMPQ; + p->from.type = D_AX; + p->to.type = D_CONST; + p->to.offset = framesize+(StackGuard-StackSmall); + } + + // common + p = appendp(p); + p->as = AJHI; + p->to.type = D_BRANCH; + q = p; + + // If we ask for more stack, we'll get a minimum of StackMin bytes. + // We need a stack frame large enough to hold the top-of-stack data, + // the function arguments+results, our caller's PC, our frame, + // a word for the return PC of the next call, and then the StackLimit bytes + // that must be available on entry to any function called from a function + // that did a stack check. If StackMin is enough, don't ask for a specific + // amount: then we can use the custom functions and save a few + // instructions. + moreconst1 = 0; + if(StackTop + textarg + PtrSize + framesize + PtrSize + StackLimit >= StackMin) + moreconst1 = framesize; + moreconst2 = textarg; + if(moreconst2 == 1) // special marker + moreconst2 = 0; + if((moreconst2&7) != 0) + diag("misaligned argument size in stack split"); + // 4 varieties varieties (const1==0 cross const2==0) + // and 6 subvarieties of (const1==0 and const2!=0) + p = appendp(p); + if(moreconst1 == 0 && moreconst2 == 0) { + p->as = ACALL; + p->to.type = D_BRANCH; + p->pcond = pmorestack[0]; + p->to.sym = symmorestack[0]; + } else + if(moreconst1 != 0 && moreconst2 == 0) { + p->as = AMOVL; + p->from.type = D_CONST; + p->from.offset = moreconst1; + p->to.type = D_AX; + + p = appendp(p); + p->as = ACALL; + p->to.type = D_BRANCH; + p->pcond = pmorestack[1]; + p->to.sym = symmorestack[1]; + } else + if(moreconst1 == 0 && moreconst2 <= 48 && moreconst2%8 == 0) { + i = moreconst2/8 + 3; + p->as = ACALL; + p->to.type = D_BRANCH; + p->pcond = pmorestack[i]; + p->to.sym = symmorestack[i]; + } else + if(moreconst1 == 0 && moreconst2 != 0) { + p->as = AMOVL; + p->from.type = D_CONST; + p->from.offset = moreconst2; + p->to.type = D_AX; + + p = appendp(p); + p->as = ACALL; + p->to.type = D_BRANCH; + p->pcond = pmorestack[2]; + p->to.sym = symmorestack[2]; + } else { + p->as = AMOVQ; + p->from.type = D_CONST; + p->from.offset = (uint64)moreconst2 << 32; + p->from.offset |= moreconst1; + p->to.type = D_AX; + + p = appendp(p); + p->as = ACALL; + p->to.type = D_BRANCH; + p->pcond = pmorestack[3]; + p->to.sym = symmorestack[3]; + } + + p = appendp(p); + p->as = AJMP; + p->to.type = D_BRANCH; + p->pcond = cursym->text->link; + + if(q != P) + q->pcond = p->link; + if(q1 != P) + q1->pcond = q->link; + + *jmpok = q; + return p; +} + vlong atolwhex(char *s) { diff --git a/src/cmd/8l/pass.c b/src/cmd/8l/pass.c index 41e40b05b4..2151adf9a8 100644 --- a/src/cmd/8l/pass.c +++ b/src/cmd/8l/pass.c @@ -405,15 +405,19 @@ brloop(Prog *p) return q; } +static Prog* load_g_cx(Prog*); +static Prog* stacksplit(Prog*, int32, Prog**); + +static Sym *plan9_tos; +static Prog *pmorestack; +static Sym *symmorestack; + void dostkoff(void) { Prog *p, *q, *q1; int32 autoffset, deltasp; - int a, arg; - Prog *pmorestack; - Sym *symmorestack; - Sym *plan9_tos; + int a; pmorestack = P; symmorestack = lookup("runtime.morestack", 0); @@ -440,217 +444,13 @@ dostkoff(void) q = P; q1 = P; - if(pmorestack != P) - if(!(p->from.scale & NOSPLIT)) { - p = appendp(p); // load g into CX - switch(HEADTYPE) { - case Hwindows: - p->as = AMOVL; - p->from.type = D_INDIR+D_FS; - p->from.offset = 0x14; - p->to.type = D_CX; - - p = appendp(p); - p->as = AMOVL; - p->from.type = D_INDIR+D_CX; - p->from.offset = 0; - p->to.type = D_CX; - break; - - case Hlinux: - if(linkmode != LinkExternal) { - p->as = AMOVL; - p->from.type = D_INDIR+D_GS; - p->from.offset = 0; - p->to.type = D_CX; - - p = appendp(p); - p->as = AMOVL; - p->from.type = D_INDIR+D_CX; - p->from.offset = tlsoffset + 0; - p->to.type = D_CX; - } else { - p->as = AMOVL; - p->from.type = D_INDIR+D_GS; - p->from.offset = tlsoffset + 0; - p->to.type = D_CX; - p->from.index = D_GS; - p->from.scale = 1; - } - break; - - case Hplan9x32: - p->as = AMOVL; - p->from.type = D_EXTERN; - p->from.sym = plan9_tos; - p->to.type = D_CX; - - p = appendp(p); - p->as = AMOVL; - p->from.type = D_INDIR+D_CX; - p->from.offset = tlsoffset + 0; - p->to.type = D_CX; - break; - - default: - p->as = AMOVL; - p->from.type = D_INDIR+D_GS; - p->from.offset = tlsoffset + 0; - p->to.type = D_CX; - } - - if(debug['K']) { - // 8l -K means check not only for stack - // overflow but stack underflow. - // On underflow, INT 3 (breakpoint). - // Underflow itself is rare but this also - // catches out-of-sync stack guard info. - p = appendp(p); - p->as = ACMPL; - p->from.type = D_INDIR+D_CX; - p->from.offset = 4; - p->to.type = D_SP; - - p = appendp(p); - p->as = AJCC; - p->to.type = D_BRANCH; - p->to.offset = 4; - q1 = p; - - p = appendp(p); - p->as = AINT; - p->from.type = D_CONST; - p->from.offset = 3; - - p = appendp(p); - p->as = ANOP; - q1->pcond = p; - } - q1 = P; - - if(autoffset <= StackSmall) { - // small stack: SP <= stackguard - // CMPL SP, stackguard - p = appendp(p); - p->as = ACMPL; - p->from.type = D_SP; - p->to.type = D_INDIR+D_CX; - } else if(autoffset <= StackBig) { - // large stack: SP-framesize <= stackguard-StackSmall - // LEAL -(autoffset-StackSmall)(SP), AX - // CMPL AX, stackguard - p = appendp(p); - p->as = ALEAL; - p->from.type = D_INDIR+D_SP; - p->from.offset = -(autoffset-StackSmall); - p->to.type = D_AX; - - p = appendp(p); - p->as = ACMPL; - p->from.type = D_AX; - p->to.type = D_INDIR+D_CX; - } else { - // Such a large stack we need to protect against wraparound - // if SP is close to zero. - // SP-stackguard+StackGuard <= framesize + (StackGuard-StackSmall) - // The +StackGuard on both sides is required to keep the left side positive: - // SP is allowed to be slightly below stackguard. See stack.h. - // - // Preemption sets stackguard to StackPreempt, a very large value. - // That breaks the math above, so we have to check for that explicitly. - // MOVL stackguard, CX - // CMPL CX, $StackPreempt - // JEQ label-of-call-to-morestack - // LEAL StackGuard(SP), AX - // SUBL stackguard, AX - // CMPL AX, $(autoffset+(StackGuard-StackSmall)) - p = appendp(p); - p->as = AMOVL; - p->from.type = D_INDIR+D_CX; - p->from.offset = 0; - p->to.type = D_SI; - - p = appendp(p); - p->as = ACMPL; - p->from.type = D_SI; - p->to.type = D_CONST; - p->to.offset = (uint32)StackPreempt; - - p = appendp(p); - p->as = AJEQ; - p->to.type = D_BRANCH; - q1 = p; - - p = appendp(p); - p->as = ALEAL; - p->from.type = D_INDIR+D_SP; - p->from.offset = StackGuard; - p->to.type = D_AX; - - p = appendp(p); - p->as = ASUBL; - p->from.type = D_SI; - p->from.offset = 0; - p->to.type = D_AX; - - p = appendp(p); - p->as = ACMPL; - p->from.type = D_AX; - p->to.type = D_CONST; - p->to.offset = autoffset+(StackGuard-StackSmall); - } - - // common - p = appendp(p); - p->as = AJHI; - p->to.type = D_BRANCH; - p->to.offset = 4; - q = p; - - p = appendp(p); // save frame size in DI - p->as = AMOVL; - p->to.type = D_DI; - p->from.type = D_CONST; - - // If we ask for more stack, we'll get a minimum of StackMin bytes. - // We need a stack frame large enough to hold the top-of-stack data, - // the function arguments+results, our caller's PC, our frame, - // a word for the return PC of the next call, and then the StackLimit bytes - // that must be available on entry to any function called from a function - // that did a stack check. If StackMin is enough, don't ask for a specific - // amount: then we can use the custom functions and save a few - // instructions. - if(StackTop + cursym->text->to.offset2 + PtrSize + autoffset + PtrSize + StackLimit >= StackMin) - p->from.offset = (autoffset+7) & ~7LL; - - arg = cursym->text->to.offset2; - if(arg == 1) // special marker for known 0 - arg = 0; - if(arg&3) - diag("misaligned argument size in stack split"); - p = appendp(p); // save arg size in AX - p->as = AMOVL; - p->to.type = D_AX; - p->from.type = D_CONST; - p->from.offset = arg; - - p = appendp(p); - p->as = ACALL; - p->to.type = D_BRANCH; - p->pcond = pmorestack; - p->to.sym = symmorestack; + if(!(p->from.scale & NOSPLIT)) { p = appendp(p); - p->as = AJMP; - p->to.type = D_BRANCH; - p->pcond = cursym->text->link; + p = load_g_cx(p); // load g into CX + p = stacksplit(p, autoffset, &q); // emit split check } - if(q != P) - q->pcond = p->link; - if(q1 != P) - q1->pcond = q->link; - if(autoffset) { p = appendp(p); p->as = AADJSP; @@ -761,6 +561,239 @@ dostkoff(void) } } +// Append code to p to load g into cx. +// Overwrites p with the first instruction (no first appendp). +// Overwriting p is unusual but it lets use this in both the +// prologue (caller must call appendp first) and in the epilogue. +// Returns last new instruction. +static Prog* +load_g_cx(Prog *p) +{ + switch(HEADTYPE) { + case Hwindows: + p->as = AMOVL; + p->from.type = D_INDIR+D_FS; + p->from.offset = 0x14; + p->to.type = D_CX; + + p = appendp(p); + p->as = AMOVL; + p->from.type = D_INDIR+D_CX; + p->from.offset = 0; + p->to.type = D_CX; + break; + + case Hlinux: + if(linkmode != LinkExternal) { + p->as = AMOVL; + p->from.type = D_INDIR+D_GS; + p->from.offset = 0; + p->to.type = D_CX; + + p = appendp(p); + p->as = AMOVL; + p->from.type = D_INDIR+D_CX; + p->from.offset = tlsoffset + 0; + p->to.type = D_CX; + } else { + p->as = AMOVL; + p->from.type = D_INDIR+D_GS; + p->from.offset = tlsoffset + 0; + p->to.type = D_CX; + p->from.index = D_GS; + p->from.scale = 1; + } + break; + + case Hplan9x32: + p->as = AMOVL; + p->from.type = D_EXTERN; + p->from.sym = plan9_tos; + p->to.type = D_CX; + + p = appendp(p); + p->as = AMOVL; + p->from.type = D_INDIR+D_CX; + p->from.offset = tlsoffset + 0; + p->to.type = D_CX; + break; + + default: + p->as = AMOVL; + p->from.type = D_INDIR+D_GS; + p->from.offset = tlsoffset + 0; + p->to.type = D_CX; + } + return p; +} + +// Append code to p to check for stack split. +// Appends to (does not overwrite) p. +// Assumes g is in CX. +// Returns last new instruction. +// On return, *jmpok is the instruction that should jump +// to the stack frame allocation if no split is needed. +static Prog* +stacksplit(Prog *p, int32 framesize, Prog **jmpok) +{ + Prog *q, *q1; + int arg; + + if(debug['K']) { + // 8l -K means check not only for stack + // overflow but stack underflow. + // On underflow, INT 3 (breakpoint). + // Underflow itself is rare but this also + // catches out-of-sync stack guard info. + p = appendp(p); + p->as = ACMPL; + p->from.type = D_INDIR+D_CX; + p->from.offset = 4; + p->to.type = D_SP; + + p = appendp(p); + p->as = AJCC; + p->to.type = D_BRANCH; + p->to.offset = 4; + q1 = p; + + p = appendp(p); + p->as = AINT; + p->from.type = D_CONST; + p->from.offset = 3; + + p = appendp(p); + p->as = ANOP; + q1->pcond = p; + } + q1 = P; + + if(framesize <= StackSmall) { + // small stack: SP <= stackguard + // CMPL SP, stackguard + p = appendp(p); + p->as = ACMPL; + p->from.type = D_SP; + p->to.type = D_INDIR+D_CX; + } else if(framesize <= StackBig) { + // large stack: SP-framesize <= stackguard-StackSmall + // LEAL -(framesize-StackSmall)(SP), AX + // CMPL AX, stackguard + p = appendp(p); + p->as = ALEAL; + p->from.type = D_INDIR+D_SP; + p->from.offset = -(framesize-StackSmall); + p->to.type = D_AX; + + p = appendp(p); + p->as = ACMPL; + p->from.type = D_AX; + p->to.type = D_INDIR+D_CX; + } else { + // Such a large stack we need to protect against wraparound + // if SP is close to zero. + // SP-stackguard+StackGuard <= framesize + (StackGuard-StackSmall) + // The +StackGuard on both sides is required to keep the left side positive: + // SP is allowed to be slightly below stackguard. See stack.h. + // + // Preemption sets stackguard to StackPreempt, a very large value. + // That breaks the math above, so we have to check for that explicitly. + // MOVL stackguard, CX + // CMPL CX, $StackPreempt + // JEQ label-of-call-to-morestack + // LEAL StackGuard(SP), AX + // SUBL stackguard, AX + // CMPL AX, $(framesize+(StackGuard-StackSmall)) + p = appendp(p); + p->as = AMOVL; + p->from.type = D_INDIR+D_CX; + p->from.offset = 0; + p->to.type = D_SI; + + p = appendp(p); + p->as = ACMPL; + p->from.type = D_SI; + p->to.type = D_CONST; + p->to.offset = (uint32)StackPreempt; + + p = appendp(p); + p->as = AJEQ; + p->to.type = D_BRANCH; + q1 = p; + + p = appendp(p); + p->as = ALEAL; + p->from.type = D_INDIR+D_SP; + p->from.offset = StackGuard; + p->to.type = D_AX; + + p = appendp(p); + p->as = ASUBL; + p->from.type = D_SI; + p->from.offset = 0; + p->to.type = D_AX; + + p = appendp(p); + p->as = ACMPL; + p->from.type = D_AX; + p->to.type = D_CONST; + p->to.offset = framesize+(StackGuard-StackSmall); + } + + // common + p = appendp(p); + p->as = AJHI; + p->to.type = D_BRANCH; + p->to.offset = 4; + q = p; + + p = appendp(p); // save frame size in DI + p->as = AMOVL; + p->to.type = D_DI; + p->from.type = D_CONST; + + // If we ask for more stack, we'll get a minimum of StackMin bytes. + // We need a stack frame large enough to hold the top-of-stack data, + // the function arguments+results, our caller's PC, our frame, + // a word for the return PC of the next call, and then the StackLimit bytes + // that must be available on entry to any function called from a function + // that did a stack check. If StackMin is enough, don't ask for a specific + // amount: then we can use the custom functions and save a few + // instructions. + if(StackTop + cursym->text->to.offset2 + PtrSize + framesize + PtrSize + StackLimit >= StackMin) + p->from.offset = (framesize+7) & ~7LL; + + arg = cursym->text->to.offset2; + if(arg == 1) // special marker for known 0 + arg = 0; + if(arg&3) + diag("misaligned argument size in stack split"); + p = appendp(p); // save arg size in AX + p->as = AMOVL; + p->to.type = D_AX; + p->from.type = D_CONST; + p->from.offset = arg; + + p = appendp(p); + p->as = ACALL; + p->to.type = D_BRANCH; + p->pcond = pmorestack; + p->to.sym = symmorestack; + + p = appendp(p); + p->as = AJMP; + p->to.type = D_BRANCH; + p->pcond = cursym->text->link; + + if(q != P) + q->pcond = p->link; + if(q1 != P) + q1->pcond = q->link; + + *jmpok = q; + return p; +} + int32 atolwhex(char *s) { -- 2.48.1