On entry to a function, zero the results and zero the pointer
section of the local variables.
This is an intermediate step on the way to precise collection
of Go frames.
This can incur a significant (up to 30%) slowdown, but it also ensures
that the garbage collector never looks at a word in a Go frame
and sees a stale pointer value that could cause a space leak.
(C frames and assembly frames are still possibly problematic.)
This CL is required to start making collection of interface values
as precise as collection of pointer values are today.
Since we have to dereference the interface type to understand
whether the value is a pointer, it is critical that the type field be
initialized.
A future CL by Carl will make the garbage collection pointer
bitmaps context-sensitive. At that point it will be possible to
remove most of the zeroing. The only values that will still need
zeroing are values whose addresses escape the block scoping
of the function but do not escape to the heap.
benchmark old ns/op new ns/op delta
BenchmarkBinaryTree17
4420289180 4331060459 -2.02%
BenchmarkFannkuch11
3442469663 3277706251 -4.79%
BenchmarkFmtFprintfEmpty 100 142 +42.00%
BenchmarkFmtFprintfString 262 310 +18.32%
BenchmarkFmtFprintfInt 213 281 +31.92%
BenchmarkFmtFprintfIntInt 355 431 +21.41%
BenchmarkFmtFprintfPrefixedInt 321 383 +19.31%
BenchmarkFmtFprintfFloat 444 533 +20.05%
BenchmarkFmtManyArgs 1380 1559 +12.97%
BenchmarkGobDecode
10240054 11794915 +15.18%
BenchmarkGobEncode
17350274 19970478 +15.10%
BenchmarkGzip
455179460 460699139 +1.21%
BenchmarkGunzip
114271814 119291574 +4.39%
BenchmarkHTTPClientServer 89051 89894 +0.95%
BenchmarkJSONEncode
40486799 52691558 +30.15%
BenchmarkJSONDecode
94193361 112428781 +19.36%
BenchmarkMandelbrot200
4747060 4748043 +0.02%
BenchmarkGoParse
6363798 6675098 +4.89%
BenchmarkRegexpMatchEasy0_32 129 171 +32.56%
BenchmarkRegexpMatchEasy0_1K 365 395 +8.22%
BenchmarkRegexpMatchEasy1_32 106 152 +43.40%
BenchmarkRegexpMatchEasy1_1K 952 1245 +30.78%
BenchmarkRegexpMatchMedium_32 198 283 +42.93%
BenchmarkRegexpMatchMedium_1K 79006 101097 +27.96%
BenchmarkRegexpMatchHard_32 3478 5115 +47.07%
BenchmarkRegexpMatchHard_1K 110245 163582 +48.38%
BenchmarkRevcomp
777384355 793270857 +2.04%
BenchmarkTemplate
136713089 157093609 +14.91%
BenchmarkTimeParse 1511 1761 +16.55%
BenchmarkTimeFormat 535 850 +58.88%
benchmark old MB/s new MB/s speedup
BenchmarkGobDecode 74.95 65.07 0.87x
BenchmarkGobEncode 44.24 38.43 0.87x
BenchmarkGzip 42.63 42.12 0.99x
BenchmarkGunzip 169.81 162.67 0.96x
BenchmarkJSONEncode 47.93 36.83 0.77x
BenchmarkJSONDecode 20.60 17.26 0.84x
BenchmarkGoParse 9.10 8.68 0.95x
BenchmarkRegexpMatchEasy0_32 247.24 186.31 0.75x
BenchmarkRegexpMatchEasy0_1K 2799.20 2591.93 0.93x
BenchmarkRegexpMatchEasy1_32 299.31 210.44 0.70x
BenchmarkRegexpMatchEasy1_1K 1074.71 822.45 0.77x
BenchmarkRegexpMatchMedium_32 5.04 3.53 0.70x
BenchmarkRegexpMatchMedium_1K 12.96 10.13 0.78x
BenchmarkRegexpMatchHard_32 9.20 6.26 0.68x
BenchmarkRegexpMatchHard_1K 9.29 6.26 0.67x
BenchmarkRevcomp 326.95 320.40 0.98x
BenchmarkTemplate 14.19 12.35 0.87x
R=cshapiro
CC=golang-dev
https://golang.org/cl/
12616045
#include "gg.h"
#include "opt.h"
+static Prog* appendp(Prog*, int, int, int, int32, int, int, int32);
+
void
-defframe(Prog *ptxt)
+defframe(Prog *ptxt, Bvec *bv)
{
+ int i, first;
+ uint32 frame;
+ Prog *p, *p1;
+
// fill in argument size
ptxt->to.type = D_CONST2;
ptxt->to.offset2 = rnd(curfn->type->argwid, widthptr);
// fill in final stack size
if(stksize > maxstksize)
maxstksize = stksize;
- ptxt->to.offset = rnd(maxstksize+maxarg, widthptr);
+ frame = rnd(maxstksize+maxarg, widthptr);
+ ptxt->to.offset = frame;
maxstksize = 0;
+
+ // insert code to clear pointered part of the frame,
+ // so that garbage collector only sees initialized values
+ // when it looks for pointers.
+ p = ptxt;
+ while(p->link->as == AFUNCDATA || p->link->as == APCDATA || p->link->as == ATYPE)
+ p = p->link;
+ if(stkptrsize >= 8*widthptr) {
+ p = appendp(p, AMOVW, D_CONST, NREG, 0, D_REG, 0, 0);
+ p = appendp(p, AADD, D_CONST, NREG, 4+frame-stkptrsize, D_REG, 1, 0);
+ p->reg = REGSP;
+ p = appendp(p, AADD, D_CONST, NREG, stkptrsize, D_REG, 2, 0);
+ p->reg = 1;
+ p1 = p = appendp(p, AMOVW, D_REG, 0, 0, D_OREG, 1, 4);
+ p->scond |= C_PBIT;
+ p = appendp(p, ACMP, D_REG, 1, 0, D_NONE, 0, 0);
+ p->reg = 2;
+ p = appendp(p, ABNE, D_NONE, NREG, 0, D_BRANCH, NREG, 0);
+ patch(p, p1);
+ } else {
+ first = 1;
+ for(i=0; i<stkptrsize; i+=widthptr) {
+ if(bvget(bv, i/widthptr)) {
+ if(first) {
+ p = appendp(p, AMOVW, D_CONST, NREG, 0, D_REG, 0, 0);
+ first = 0;
+ }
+ p = appendp(p, AMOVW, D_REG, 0, 0, D_OREG, REGSP, 4+frame-stkptrsize+i);
+ }
+ }
+ }
+}
+
+static Prog*
+appendp(Prog *p, int as, int ftype, int freg, int32 foffset, int ttype, int treg, int32 toffset)
+{
+ Prog *q;
+
+ q = mal(sizeof(*q));
+ clearp(q);
+ q->as = as;
+ q->lineno = p->lineno;
+ q->from.type = ftype;
+ q->from.reg = freg;
+ q->from.offset = foffset;
+ q->to.type = ttype;
+ q->to.reg = treg;
+ q->to.offset = toffset;
+ q->link = p->link;
+ p->link = q;
+ return q;
}
// Sweep the prog list to mark any used nodes.
#include "gg.h"
#include "opt.h"
+static Prog* appendp(Prog*, int, int, vlong, int, vlong);
+
void
-defframe(Prog *ptxt)
+defframe(Prog *ptxt, Bvec *bv)
{
+ int i;
+ uint32 frame;
+ Prog *p;
+
// fill in argument size
ptxt->to.offset = rnd(curfn->type->argwid, widthptr);
// fill in final stack size
ptxt->to.offset <<= 32;
- ptxt->to.offset |= rnd(stksize+maxarg, widthptr);
+ frame = rnd(stksize+maxarg, widthptr);
+ ptxt->to.offset |= frame;
+
+ // insert code to clear pointered part of the frame,
+ // so that garbage collector only sees initialized values
+ // when it looks for pointers.
+ p = ptxt;
+ if(stkptrsize >= 8*widthptr) {
+ p = appendp(p, AMOVQ, D_CONST, 0, D_AX, 0);
+ p = appendp(p, AMOVQ, D_CONST, stkptrsize/widthptr, D_CX, 0);
+ p = appendp(p, ALEAQ, D_SP+D_INDIR, frame-stkptrsize, D_DI, 0);
+ p = appendp(p, AREP, D_NONE, 0, D_NONE, 0);
+ appendp(p, ASTOSQ, D_NONE, 0, D_NONE, 0);
+ } else {
+ for(i=0; i<stkptrsize; i+=widthptr)
+ if(bvget(bv, i/widthptr))
+ p = appendp(p, AMOVQ, D_CONST, 0, D_SP+D_INDIR, frame-stkptrsize+i);
+ }
+}
+
+static Prog*
+appendp(Prog *p, int as, int ftype, vlong foffset, int ttype, vlong toffset)
+{
+ Prog *q;
+
+ q = mal(sizeof(*q));
+ clearp(q);
+ q->as = as;
+ q->lineno = p->lineno;
+ q->from.type = ftype;
+ q->from.offset = foffset;
+ q->to.type = ttype;
+ q->to.offset = toffset;
+ q->link = p->link;
+ p->link = q;
+ return q;
}
// Sweep the prog list to mark any used nodes.
#include "gg.h"
#include "opt.h"
+static Prog* appendp(Prog*, int, int, int32, int, int32);
+
void
-defframe(Prog *ptxt)
+defframe(Prog *ptxt, Bvec *bv)
{
+ uint32 frame;
+ Prog *p;
+ int i;
+
// fill in argument size
ptxt->to.offset2 = rnd(curfn->type->argwid, widthptr);
// fill in final stack size
if(stksize > maxstksize)
maxstksize = stksize;
- ptxt->to.offset = rnd(maxstksize+maxarg, widthptr);
+ frame = rnd(maxstksize+maxarg, widthptr);
+ ptxt->to.offset = frame;
maxstksize = 0;
+
+ // insert code to clear pointered part of the frame,
+ // so that garbage collector only sees initialized values
+ // when it looks for pointers.
+ p = ptxt;
+ if(stkptrsize >= 8*widthptr) {
+ p = appendp(p, AMOVL, D_CONST, 0, D_AX, 0);
+ p = appendp(p, AMOVL, D_CONST, stkptrsize/widthptr, D_CX, 0);
+ p = appendp(p, ALEAL, D_SP+D_INDIR, frame-stkptrsize, D_DI, 0);
+ p = appendp(p, AREP, D_NONE, 0, D_NONE, 0);
+ appendp(p, ASTOSL, D_NONE, 0, D_NONE, 0);
+ } else {
+ for(i=0; i<stkptrsize; i+=widthptr)
+ if(bvget(bv, i/widthptr))
+ p = appendp(p, AMOVL, D_CONST, 0, D_SP+D_INDIR, frame-stkptrsize+i);
+ }
+}
+
+static Prog*
+appendp(Prog *p, int as, int ftype, int32 foffset, int ttype, int32 toffset)
+{
+ Prog *q;
+
+ q = mal(sizeof(*q));
+ clearp(q);
+ q->as = as;
+ q->lineno = p->lineno;
+ q->from.type = ftype;
+ q->from.offset = foffset;
+ q->to.type = ttype;
+ q->to.offset = toffset;
+ q->link = p->link;
+ p->link = q;
+ return q;
}
// Sweep the prog list to mark any used nodes.
void cgen_ret(Node *n);
void clearfat(Node *n);
void compile(Node*);
-void defframe(Prog*);
+void defframe(Prog*, Bvec*);
int dgostringptr(Sym*, int off, char *str);
int dgostrlitptr(Sym*, int off, Strlit*);
int dstringptr(Sym *s, int off, char *str);
static void allocauto(Prog* p);
static void dumpgcargs(Node*, Sym*);
-static void dumpgclocals(Node*, Sym*);
+static Bvec* dumpgclocals(Node*, Sym*);
void
compile(Node *fn)
{
+ Bvec *bv;
Plist *pl;
Node nod1, *n, *gcargsnod, *gclocalsnod;
Prog *ptxt, *p, *p1;
goto ret;
}
- defframe(ptxt);
+ // Emit garbage collection symbols.
+ dumpgcargs(fn, gcargssym);
+ bv = dumpgclocals(curfn, gclocalssym);
+
+ defframe(ptxt, bv);
+ free(bv);
if(0)
frame(0);
- // Emit garbage collection symbols.
- dumpgcargs(fn, gcargssym);
- dumpgclocals(curfn, gclocalssym);
-
ret:
lineno = lno;
}
// Compute a bit vector to describes the pointer containing locations
// in local variables and dumps the bitvector length and data out to
-// the provided symbol.
-static void
+// the provided symbol. Returns the vector for use and freeing by caller.
+static Bvec*
dumpgclocals(Node* fn, Sym *sym)
{
Bvec *bv;
for(i = 0; i < bv->n; i += 32) {
off = duint32(sym, off, bv->b[i/32]);
}
- free(bv);
ggloblsym(sym, off, 0, 1);
+ return bv;
}
// Sort the list of stack variables. Autos after anything else,
v = t->nname;
if(v && v->sym && v->sym->name[0] == '~')
v = N;
- if(v == N && out && hasdefer) {
+ // The garbage collector assumes results are always live,
+ // so zero them always (1 ||).
+ if(out && (1 || (v == N && hasdefer))) {
// Defer might stop a panic and show the
// return values as they exist at the time of panic.
// Make sure to zero them on entry to the function.