My previous CL:
changeset: 9645:
ce2e5f44b310
user: Russ Cox <rsc@golang.org>
date: Tue Sep 06 10:24:21 2011 -0400
summary: gc: unify stack frame layout
introduced a bug wherein no variables were
being registerized, making Go programs 2-3x
slower than they had been before.
This CL fixes that bug (along with some others
it was hiding) and adds a test that optimization
makes at least one test case faster.
R=ken2
CC=golang-dev
https://golang.org/cl/
5174045
a->offset = widthptr+4; // skip header
a->reg = NREG;
a->sym = sym;
+ a->node = sym->def;
}
/*
a->offset = 0; // header
a->reg = NREG;
a->sym = sym;
+ a->node = sym->def;
}
void
fatal("nodarg: offset not computed for %T", t);
n->xoffset = t->width;
n->addable = 1;
+ n->orig = t->nname;
fp:
switch(fp) {
a->sym = n->left->sym;
a->type = D_OREG;
a->name = D_PARAM;
+ a->node = n->left->orig;
break;
case ONAME:
}
a->offset = n->xoffset;
a->sym = n->sym;
+ a->node = n->orig;
+ //if(a->node >= (Node*)&n)
+ // fatal("stack node");
if(a->sym == S)
a->sym = lookup(".noname");
if(n->method) {
break;
case PAUTO:
a->name = D_AUTO;
- if (n->sym)
- a->node = n->orig;
break;
case PPARAM:
case PPARAMOUT:
ovar.b[z] |= bit.b[z];
t = structnext(&save);
}
-//if(bany(b))
-//print("ovars = %Q\n", &ovar);
+//if(bany(ovar))
+//print("ovar = %Q\n", ovar);
}
void
}
node = a->node;
- if(node == N || node->op != ONAME || node->orig != N)
+ if(node == N || node->op != ONAME || node->orig == N)
goto none;
node = node->orig;
- if(node->sym->name[0] == '.')
+ if(node->orig != node)
+ fatal("%D: bad node", a);
+ if(node->sym == S || node->sym->name[0] == '.')
goto none;
et = a->etype;
o = a->offset;
if(bany(&r->refahead))
print(" ra:%Q ", r->refahead);
if(bany(&r->calbehind))
- print("cb:%Q ", r->calbehind);
+ print(" cb:%Q ", r->calbehind);
if(bany(&r->calahead))
print(" ca:%Q ", r->calahead);
if(bany(&r->regdiff))
sym = stringsym(s, len);
a->type = D_EXTERN;
a->sym = sym;
+ a->node = sym->def;
a->offset = widthptr+4; // skip header
a->etype = TINT32;
}
sym = stringsym(sval->s, sval->len);
a->type = D_EXTERN;
a->sym = sym;
+ a->node = sym->def;
a->offset = 0; // header
a->etype = TINT32;
}
fatal("nodarg: offset not computed for %T", t);
n->xoffset = t->width;
n->addable = 1;
+ n->orig = t->nname;
fp:
switch(fp) {
a->offset = n->xoffset;
a->sym = n->left->sym;
a->type = D_PARAM;
+ a->node = n->left->orig;
break;
case ONAME:
}
a->offset = n->xoffset;
a->sym = n->sym;
+ a->node = n->orig;
+ //if(a->node >= (Node*)&n)
+ // fatal("stack node");
if(a->sym == S)
a->sym = lookup(".noname");
if(n->method) {
break;
case PAUTO:
a->type = D_AUTO;
- if (n->sym)
- a->node = n->orig;
break;
case PPARAM:
case PPARAMOUT:
ovar.b[z] |= bit.b[z];
t = structnext(&save);
}
-//if(bany(b))
-//print("ovars = %Q\n", &ovar);
+//if(bany(&ovar))
+//print("ovars = %Q\n", ovar);
}
static void
n = t;
break;
}
+
node = a->node;
- if(node == N || node->op != ONAME || node->orig != N)
+ if(node == N || node->op != ONAME || node->orig == N)
goto none;
node = node->orig;
- if(node->sym->name[0] == '.')
+ if(node->orig != node)
+ fatal("%D: bad node", a);
+ if(node->sym == S || node->sym->name[0] == '.')
goto none;
et = a->etype;
o = a->offset;
if(bany(&r->refahead))
print(" ra:%Q ", r->refahead);
if(bany(&r->calbehind))
- print("cb:%Q ", r->calbehind);
+ print(" cb:%Q ", r->calbehind);
if(bany(&r->calahead))
print(" ca:%Q ", r->calahead);
if(bany(&r->regdiff))
sym = stringsym(s, len);
a->type = D_EXTERN;
a->sym = sym;
+ a->node = sym->def;
a->offset = widthptr+4; // skip header
a->etype = TINT32;
}
sym = stringsym(sval->s, sval->len);
a->type = D_EXTERN;
a->sym = sym;
+ a->node = sym->def;
a->offset = 0; // header
a->etype = TINT32;
}
fatal("nodarg: offset not computed for %T", t);
n->xoffset = t->width;
n->addable = 1;
+ n->orig = t->nname;
break;
}
strcpy(namebuf, n->sym->name);
namebuf[0] = '.'; // keep optimizer from registerizing
n->sym = lookup(namebuf);
+ n->orig->sym = n->sym;
}
void
a->offset = n->xoffset;
a->sym = n->left->sym;
a->type = D_PARAM;
+ a->node = n->left->orig;
break;
case ONAME:
}
a->offset = n->xoffset;
a->sym = n->sym;
+ a->node = n->orig;
+ //if(a->node >= (Node*)&n)
+ // fatal("stack node");
if(a->sym == S)
a->sym = lookup(".noname");
if(n->method) {
break;
case PAUTO:
a->type = D_AUTO;
- if (n->sym)
- a->node = n->orig;
break;
case PPARAM:
case PPARAMOUT:
ovar.b[z] |= bit.b[z];
t = structnext(&save);
}
-//if(bany(b))
-//print("ovars = %Q\n", &ovar);
+//if(bany(ovar))
+//print("ovars = %Q\n", ovar);
}
static void
}
node = a->node;
- if(node == N || node->op != ONAME || node->orig != N)
+ if(node == N || node->op != ONAME || node->orig == N)
goto none;
node = node->orig;
- if(node->sym->name[0] == '.')
+ if(node->orig != node)
+ fatal("%D: bad node", a);
+ if(node->sym == S || node->sym->name[0] == '.')
goto none;
et = a->etype;
o = a->offset;
if(bany(&r->refahead))
print(" ra:%Q ", r->refahead);
if(bany(&r->calbehind))
- print("cb:%Q ", r->calbehind);
+ print(" cb:%Q ", r->calbehind);
if(bany(&r->calahead))
print(" ca:%Q ", r->calahead);
if(bany(&r->regdiff))
else
fmtprint(fp, " ");
if(var[i].node == N || var[i].node->sym == S)
- fmtprint(fp, "$%lld", var[i].offset);
+ fmtprint(fp, "$%lld", i);
else {
- fmtprint(fp, var[i].node->sym->name);
+ fmtprint(fp, "%s", var[i].node->sym->name);
if(var[i].offset != 0)
fmtprint(fp, "%+lld", (vlong)var[i].offset);
}
s = lookup(namebuf);
n = nod(ONAME, N, N);
n->sym = s;
+ s->def = n;
n->type = t;
n->class = PAUTO;
n->addable = 1;
if(sym->flags & SymUniq)
return sym;
sym->flags |= SymUniq;
+ sym->def = newname(sym);
off = 0;
--- /dev/null
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package pkg
+
+func NonASCII(b []byte, i int) int {
+ for i = 0; i < len(b); i++ {
+ if b[i] >= 0x80 {
+ break
+ }
+ }
+ return i
+}
+
--- /dev/null
+// $G -N -o slow.$A $D/bug369.dir/pkg.go &&
+// $G -o fast.$A $D/bug369.dir/pkg.go &&
+// $G $D/$F.go && $L $F.$A && ./$A.out
+
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Test that compiling with optimization turned on produces faster code.
+
+package main
+
+import (
+ "flag"
+ "os"
+ "runtime"
+ "testing"
+
+ fast "./fast"
+ slow "./slow"
+)
+
+var buf = make([]byte, 1048576)
+
+func BenchmarkFastNonASCII(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ fast.NonASCII(buf, 0)
+ }
+}
+
+func BenchmarkSlowNonASCII(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ slow.NonASCII(buf, 0)
+ }
+}
+
+func main() {
+ os.Args = []string{os.Args[0], "-test.benchtime=0.1"}
+ flag.Parse()
+
+ rslow := testing.Benchmark(BenchmarkSlowNonASCII)
+ rfast := testing.Benchmark(BenchmarkFastNonASCII)
+ tslow := rslow.NsPerOp()
+ tfast := rfast.NsPerOp()
+
+ // Optimization should be good for at least 2x, but be forgiving.
+ // On the ARM simulator we see closer to 1.5x.
+ speedup := float64(tslow)/float64(tfast)
+ want := 1.8
+ if runtime.GOARCH == "arm" {
+ want = 1.3
+ }
+ if speedup < want {
+ println("fast:", tfast, "slow:", tslow, "speedup:", speedup, "want:", want)
+ println("not fast enough")
+ }
+}