From f91e682cca2eb51a0a8b1511678a5e2b4d8a83de Mon Sep 17 00:00:00 2001 From: Russ Cox Date: Thu, 8 Aug 2013 16:38:02 -0400 Subject: [PATCH] cmd/gc: make bitmaps shorter Sort non-pointer-containing data to the low end of the stack frame, and make the bitmaps only cover the pointer-containing top end. Generates significantly less garbage collection bitmap for programs with large byte buffers on the stack. Only 2% shorter for godoc, but 99.99998% shorter in some test cases. Fixes arm build. TBR=golang-dev CC=cshapiro, golang-dev, iant https://golang.org/cl/12541047 --- src/cmd/gc/go.h | 2 ++ src/cmd/gc/pgen.c | 50 +++++++++++++++++++++++++++++++++++--------- src/cmd/gc/reflect.c | 34 ++++++++++++++++++++++-------- 3 files changed, 67 insertions(+), 19 deletions(-) diff --git a/src/cmd/gc/go.h b/src/cmd/gc/go.h index 26ffabc689..05c864a1c5 100644 --- a/src/cmd/gc/go.h +++ b/src/cmd/gc/go.h @@ -153,6 +153,7 @@ struct Type uchar broke; // broken type definition. uchar isddd; // TFIELD is ... argument uchar align; + uchar haspointers; // 0 unknown, 1 no, 2 yes Node* nod; // canonical OTYPE node Type* orig; // original type (type literal or predefined type) @@ -937,6 +938,7 @@ EXTERN NodeList* lastconst; EXTERN Node* lasttype; EXTERN vlong maxarg; EXTERN vlong stksize; // stack size for current frame +EXTERN vlong stkptrsize; // prefix of stack containing pointers for current frame EXTERN int32 blockgen; // max block number EXTERN int32 block; // current block number EXTERN int hasdefer; // flag that curfn has defer statetment diff --git a/src/cmd/gc/pgen.c b/src/cmd/gc/pgen.c index 237314ea1c..ad005a8f92 100644 --- a/src/cmd/gc/pgen.c +++ b/src/cmd/gc/pgen.c @@ -336,12 +336,12 @@ dumpgclocals(Node* fn, Sym *sym) int32 i; int off; - bv = bvalloc(rnd(stksize, widthptr) / widthptr); + bv = bvalloc(stkptrsize / widthptr); for(ll = fn->dcl; ll != nil; ll = ll->next) { node = ll->n; if(node->class == PAUTO && node->op == ONAME) { if(haspointers(node->type)) { - xoffset = node->xoffset + rnd(stksize,widthptr); + xoffset = node->xoffset + stksize; walktype1(node->type, &xoffset, bv); } } @@ -354,25 +354,40 @@ dumpgclocals(Node* fn, Sym *sym) ggloblsym(sym, off, 0, 1); } -// Sort the list of stack variables. autos after anything else, -// within autos, unused after used, and within used on reverse alignment. -// non-autos sort on offset. +// Sort the list of stack variables. Autos after anything else, +// within autos, unused after used, within used, things with +// pointers first, and then decreasing size. +// Because autos are laid out in decreasing addresses +// on the stack, pointers first and decreasing size +// really means, in memory, pointers near the top of the +// stack and increasing in size. +// Non-autos sort on offset. static int cmpstackvar(Node *a, Node *b) { + int ap, bp; + if (a->class != b->class) - return (a->class == PAUTO) ? 1 : -1; + return (a->class == PAUTO) ? +1 : -1; if (a->class != PAUTO) { if (a->xoffset < b->xoffset) return -1; if (a->xoffset > b->xoffset) - return 1; + return +1; return 0; } if ((a->used == 0) != (b->used == 0)) return b->used - a->used; - return b->type->align - a->type->align; + ap = haspointers(a->type); + bp = haspointers(b->type); + if(ap != bp) + return bp - ap; + if(a->type->width < b->type->width) + return +1; + if(a->type->width > b->type->width) + return -1; + return 0; } // TODO(lvd) find out where the PAUTO/OLITERAL nodes come from. @@ -382,9 +397,13 @@ allocauto(Prog* ptxt) NodeList *ll; Node* n; vlong w; + vlong ptrlimit; - if(curfn->dcl == nil) + if(curfn->dcl == nil) { + stksize = 0; + stkptrsize = 0; return; + } // Mark the PAUTO's unused. for(ll=curfn->dcl; ll != nil; ll=ll->next) @@ -402,6 +421,7 @@ allocauto(Prog* ptxt) // No locals used at all curfn->dcl = nil; stksize = 0; + stkptrsize = 0; fixautoused(ptxt); return; } @@ -417,6 +437,7 @@ allocauto(Prog* ptxt) // Reassign stack offsets of the locals that are still there. stksize = 0; + ptrlimit = -1; for(ll = curfn->dcl; ll != nil; ll=ll->next) { n = ll->n; if (n->class != PAUTO || n->op != ONAME) @@ -428,6 +449,8 @@ allocauto(Prog* ptxt) fatal("bad width"); stksize += w; stksize = rnd(stksize, n->type->align); + if(ptrlimit < 0 && haspointers(n->type)) + ptrlimit = stksize - w; if(thechar == '5') stksize = rnd(stksize, widthptr); if(stksize >= (1ULL<<31)) { @@ -436,11 +459,18 @@ allocauto(Prog* ptxt) } n->stkdelta = -stksize - n->xoffset; } + stksize = rnd(stksize, widthptr); + + if(ptrlimit < 0) + stkptrsize = 0; + else + stkptrsize = stksize - ptrlimit; + stkptrsize = rnd(stkptrsize, widthptr); fixautoused(ptxt); // The debug information needs accurate offsets on the symbols. - for(ll = curfn->dcl ;ll != nil; ll=ll->next) { + for(ll = curfn->dcl; ll != nil; ll=ll->next) { if (ll->n->class != PAUTO || ll->n->op != ONAME) continue; ll->n->xoffset += ll->n->stkdelta; diff --git a/src/cmd/gc/reflect.c b/src/cmd/gc/reflect.c index e74df2a8eb..9f5f80b281 100644 --- a/src/cmd/gc/reflect.c +++ b/src/cmd/gc/reflect.c @@ -459,6 +459,10 @@ int haspointers(Type *t) { Type *t1; + int ret; + + if(t->haspointers != 0) + return t->haspointers - 1; switch(t->etype) { case TINT: @@ -477,16 +481,24 @@ haspointers(Type *t) case TCOMPLEX64: case TCOMPLEX128: case TBOOL: - return 0; + ret = 0; + break; case TARRAY: - if(t->bound < 0) // slice - return 1; - return haspointers(t->type); + if(t->bound < 0) { // slice + ret = 1; + break; + } + ret = haspointers(t->type); + break; case TSTRUCT: - for(t1=t->type; t1!=T; t1=t1->down) - if(haspointers(t1->type)) - return 1; - return 0; + ret = 0; + for(t1=t->type; t1!=T; t1=t1->down) { + if(haspointers(t1->type)) { + ret = 1; + break; + } + } + break; case TSTRING: case TPTR32: case TPTR64: @@ -496,8 +508,12 @@ haspointers(Type *t) case TMAP: case TFUNC: default: - return 1; + ret = 1; + break; } + + t->haspointers = 1+ret; + return ret; } /* -- 2.48.1