From 1552e62d70374f86627d7b845ee6effb38a2aebc Mon Sep 17 00:00:00 2001 From: Russ Cox Date: Thu, 16 Oct 2014 12:43:17 -0400 Subject: [PATCH] cmd/gc: elide write barrier for x = x[0:y] and x = append(x, ...) Both of these forms can avoid writing to the base pointer in x (in the slice, always, and in the append, most of the time). For Go 1.5, will need to change the compilation of x = x[0:y] to avoid writing to the base pointer, so that the elision is safe, and will need to change the compilation of x = append(x, ...) to write to the base pointer (through a barrier) only when growing the underlying array, so that the general elision is safe. For Go 1.4, elide the write barrier always, a change that should have equivalent performance characteristics but is much simpler and therefore safer. benchmark old ns/op new ns/op delta BenchmarkBinaryTree17 3910526122 3918802545 +0.21% BenchmarkFannkuch11 3747650699 3732600693 -0.40% BenchmarkFmtFprintfEmpty 106 98.7 -6.89% BenchmarkFmtFprintfString 280 269 -3.93% BenchmarkFmtFprintfInt 296 282 -4.73% BenchmarkFmtFprintfIntInt 467 470 +0.64% BenchmarkFmtFprintfPrefixedInt 418 398 -4.78% BenchmarkFmtFprintfFloat 574 535 -6.79% BenchmarkFmtManyArgs 1768 1818 +2.83% BenchmarkGobDecode 14916799 14925182 +0.06% BenchmarkGobEncode 14110076 13358298 -5.33% BenchmarkGzip 546609795 542630402 -0.73% BenchmarkGunzip 136270657 136496277 +0.17% BenchmarkHTTPClientServer 126574 125245 -1.05% BenchmarkJSONEncode 30006238 27862354 -7.14% BenchmarkJSONDecode 106020889 102664600 -3.17% BenchmarkMandelbrot200 5793550 5818320 +0.43% BenchmarkGoParse 5437608 5463962 +0.48% BenchmarkRegexpMatchEasy0_32 192 179 -6.77% BenchmarkRegexpMatchEasy0_1K 462 460 -0.43% BenchmarkRegexpMatchEasy1_32 168 153 -8.93% BenchmarkRegexpMatchEasy1_1K 1420 1280 -9.86% BenchmarkRegexpMatchMedium_32 338 286 -15.38% BenchmarkRegexpMatchMedium_1K 107435 98027 -8.76% BenchmarkRegexpMatchHard_32 5941 4846 -18.43% BenchmarkRegexpMatchHard_1K 185965 153830 -17.28% BenchmarkRevcomp 795497458 798447829 +0.37% BenchmarkTemplate 132091559 134938425 +2.16% BenchmarkTimeParse 604 608 +0.66% BenchmarkTimeFormat 551 548 -0.54% LGTM=r R=r, dave CC=golang-codereviews, iant, khr, rlh https://golang.org/cl/159960043 --- src/cmd/gc/go.h | 1 + src/cmd/gc/typecheck.c | 50 ++++++++++++++++++++++++++++++++++++++++++ src/cmd/gc/walk.c | 24 +++++++++++++++++++- 3 files changed, 74 insertions(+), 1 deletion(-) diff --git a/src/cmd/gc/go.h b/src/cmd/gc/go.h index 475754145b..965a0550d3 100644 --- a/src/cmd/gc/go.h +++ b/src/cmd/gc/go.h @@ -283,6 +283,7 @@ struct Node uchar addrtaken; // address taken, even if not moved to heap uchar dupok; // duplicate definitions ok (for func) uchar wrapper; // is method wrapper (for func) + uchar reslice; // this is a reslice x = x[0:y] or x = append(x, ...) schar likely; // likeliness of if statement uchar hasbreak; // has break statement uchar needzero; // if it contains pointers, needs to be zeroed on function entry diff --git a/src/cmd/gc/typecheck.c b/src/cmd/gc/typecheck.c index ff49fe6f92..2ad8ab5bff 100644 --- a/src/cmd/gc/typecheck.c +++ b/src/cmd/gc/typecheck.c @@ -2814,6 +2814,33 @@ checkassignlist(NodeList *l) checkassign(l->n); } +// Check whether l and r are the same side effect-free expression, +// so that it is safe to reuse one instead of computing both. +static int +samesafeexpr(Node *l, Node *r) +{ + if(l->op != r->op || !eqtype(l->type, r->type)) + return 0; + + switch(l->op) { + case ONAME: + case OCLOSUREVAR: + return l == r; + + case ODOT: + case ODOTPTR: + return l->right != nil && r->right != nil && l->right->sym == r->right->sym && samesafeexpr(l->left, r->left); + + case OIND: + return samesafeexpr(l->left, r->left); + + case OINDEX: + return samesafeexpr(l->left, r->left) && samesafeexpr(l->right, r->right); + } + + return 0; +} + /* * type check assignment. * if this assignment is the definition of a var on the left side, @@ -2851,6 +2878,29 @@ typecheckas(Node *n) n->typecheck = 1; if(n->left->typecheck == 0) typecheck(&n->left, Erv | Easgn); + + // Recognize slices being updated in place, for better code generation later. + // Don't rewrite if using race detector, to avoid needing to teach race detector + // about this optimization. + if(n->left && n->left->op != OINDEXMAP && n->right && !flag_race) { + switch(n->right->op) { + case OSLICE: + case OSLICE3: + case OSLICESTR: + // For x = x[0:y], x can be updated in place, without touching pointer. + if(samesafeexpr(n->left, n->right->left) && (n->right->right->left == N || iszero(n->right->right->left))) + n->right->reslice = 1; + break; + + case OAPPEND: + // For x = append(x, ...), x can be updated in place when there is capacity, + // without touching the pointer; otherwise the emitted code to growslice + // can take care of updating the pointer, and only in that case. + if(n->right->list != nil && samesafeexpr(n->left, n->right->list->n)) + n->right->reslice = 1; + break; + } + } } static void diff --git a/src/cmd/gc/walk.c b/src/cmd/gc/walk.c index 7f2748c668..7649728d37 100644 --- a/src/cmd/gc/walk.c +++ b/src/cmd/gc/walk.c @@ -1499,7 +1499,7 @@ fncall(Node *l, Type *rt) if(l->ullman >= UINF || l->op == OINDEXMAP) return 1; - r.op = 0; + memset(&r, 0, sizeof r); if(needwritebarrier(l, &r)) return 1; if(eqtype(l->type, rt)) @@ -2036,6 +2036,28 @@ needwritebarrier(Node *l, Node *r) if(r->op == OADDR && isglobal(r->left)) return 0; + // No write barrier for reslice: x = x[0:y] or x = append(x, ...). + // Both are compiled to modify x directly. + // In the case of append, a write barrier may still be needed + // if the underlying array grows, but the append code can + // generate the write barrier directly in that case. + // (It does not yet, but the cost of the write barrier will be + // small compared to the cost of the allocation.) + if(r->reslice) { + switch(r->op) { + case OSLICE: + case OSLICE3: + case OSLICESTR: + case OAPPEND: + break; + default: + dump("bad reslice-l", l); + dump("bad reslice-r", r); + break; + } + return 0; + } + // Otherwise, be conservative and use write barrier. return 1; } -- 2.48.1