go test -race -run none -bench . encoding/json
benchmark old ns/op new ns/op delta
BenchmarkCodeEncoder
3207689000 1716149000 -46.50%
BenchmarkCodeMarshal
3206761000 1715677000 -46.50%
BenchmarkCodeDecoder
8647304000 4482709000 -48.16%
BenchmarkCodeUnmarshal
8032217000 3451248000 -57.03%
BenchmarkCodeUnmarshalReuse
8016722000 3480502000 -56.58%
BenchmarkSkipValue
10340453000 4560313000 -55.90%
benchmark old MB/s new MB/s speedup
BenchmarkCodeEncoder 0.60 1.13 1.88x
BenchmarkCodeMarshal 0.61 1.13 1.85x
BenchmarkCodeDecoder 0.22 0.43 1.95x
BenchmarkCodeUnmarshal 0.24 0.56 2.33x
BenchmarkCodeUnmarshalReuse 0.24 0.56 2.33x
BenchmarkSkipValue 0.19 0.44 2.32x
Fixes #4248.
R=dvyukov, golang-dev, rsc
CC=golang-dev
https://golang.org/cl/
6815066
"func @\"\".int64tofloat64(? int64) (? float64)\n"
"func @\"\".uint64tofloat64(? uint64) (? float64)\n"
"func @\"\".complex128div(@\"\".num complex128, @\"\".den complex128) (@\"\".quo complex128)\n"
- "func @\"\".racefuncenter()\n"
+ "func @\"\".racefuncenter(? uintptr)\n"
"func @\"\".racefuncexit()\n"
"func @\"\".raceread(? uintptr)\n"
"func @\"\".racewrite(? uintptr)\n"
{
int i;
Node *nd;
+ Node *nodpc;
char s[1024];
if(myimportpath) {
}
}
- // TODO(dvyukov): ideally this should be:
- // racefuncenter(getreturnaddress())
- // because it's much more costly to obtain from runtime library.
- nd = mkcall("racefuncenter", T, nil);
+ // nodpc is the PC of the caller as extracted by
+ // getcallerpc. We use -widthptr(FP) for x86.
+ // BUG: this will not work on arm.
+ nodpc = nod(OXXX, nil, nil);
+ *nodpc = *nodfp;
+ nodpc->type = types[TUINTPTR];
+ nodpc->xoffset = -widthptr;
+ nd = mkcall("racefuncenter", T, nil, nodpc);
fn->enter = list(fn->enter, nd);
nd = mkcall("racefuncexit", T, nil);
fn->exit = list(fn->exit, nd); // works fine if (!fn->exit)
func complex128div(num complex128, den complex128) (quo complex128)
// race detection
-func racefuncenter()
+func racefuncenter(uintptr)
func racefuncexit()
func raceread(uintptr)
func racewrite(uintptr)
// Called from instrumented code.
void
-runtime·racefuncenter(void)
+runtime·racefuncenter(uintptr pc)
{
- uintptr pc;
+ // If the caller PC is lessstack, use slower runtime·callers
+ // to walk across the stack split to find the real caller.
+ if(pc == (uintptr)runtime·lessstack)
+ runtime·callers(2, &pc, 1);
- runtime·callers(2, &pc, 1);
m->racecall = true;
runtime∕race·FuncEnter(g->goid-1, (void*)pc);
m->racecall = false;