Both of these forms can avoid writing to the base pointer in x
(in the slice, always, and in the append, most of the time).
For Go 1.5, will need to change the compilation of x = x[0:y]
to avoid writing to the base pointer, so that the elision is safe,
and will need to change the compilation of x = append(x, ...)
to write to the base pointer (through a barrier) only when
growing the underlying array, so that the general elision is safe.
For Go 1.4, elide the write barrier always, a change that should
have equivalent performance characteristics but is much
simpler and therefore safer.
benchmark old ns/op new ns/op delta
BenchmarkBinaryTree17
3910526122 3918802545 +0.21%
BenchmarkFannkuch11
3747650699 3732600693 -0.40%
BenchmarkFmtFprintfEmpty 106 98.7 -6.89%
BenchmarkFmtFprintfString 280 269 -3.93%
BenchmarkFmtFprintfInt 296 282 -4.73%
BenchmarkFmtFprintfIntInt 467 470 +0.64%
BenchmarkFmtFprintfPrefixedInt 418 398 -4.78%
BenchmarkFmtFprintfFloat 574 535 -6.79%
BenchmarkFmtManyArgs 1768 1818 +2.83%
BenchmarkGobDecode
14916799 14925182 +0.06%
BenchmarkGobEncode
14110076 13358298 -5.33%
BenchmarkGzip
546609795 542630402 -0.73%
BenchmarkGunzip
136270657 136496277 +0.17%
BenchmarkHTTPClientServer 126574 125245 -1.05%
BenchmarkJSONEncode
30006238 27862354 -7.14%
BenchmarkJSONDecode
106020889 102664600 -3.17%
BenchmarkMandelbrot200
5793550 5818320 +0.43%
BenchmarkGoParse
5437608 5463962 +0.48%
BenchmarkRegexpMatchEasy0_32 192 179 -6.77%
BenchmarkRegexpMatchEasy0_1K 462 460 -0.43%
BenchmarkRegexpMatchEasy1_32 168 153 -8.93%
BenchmarkRegexpMatchEasy1_1K 1420 1280 -9.86%
BenchmarkRegexpMatchMedium_32 338 286 -15.38%
BenchmarkRegexpMatchMedium_1K 107435 98027 -8.76%
BenchmarkRegexpMatchHard_32 5941 4846 -18.43%
BenchmarkRegexpMatchHard_1K 185965 153830 -17.28%
BenchmarkRevcomp
795497458 798447829 +0.37%
BenchmarkTemplate
132091559 134938425 +2.16%
BenchmarkTimeParse 604 608 +0.66%
BenchmarkTimeFormat 551 548 -0.54%
LGTM=r
R=r, dave
CC=golang-codereviews, iant, khr, rlh
https://golang.org/cl/
159960043
uchar addrtaken; // address taken, even if not moved to heap
uchar dupok; // duplicate definitions ok (for func)
uchar wrapper; // is method wrapper (for func)
+ uchar reslice; // this is a reslice x = x[0:y] or x = append(x, ...)
schar likely; // likeliness of if statement
uchar hasbreak; // has break statement
uchar needzero; // if it contains pointers, needs to be zeroed on function entry
checkassign(l->n);
}
+// Check whether l and r are the same side effect-free expression,
+// so that it is safe to reuse one instead of computing both.
+static int
+samesafeexpr(Node *l, Node *r)
+{
+ if(l->op != r->op || !eqtype(l->type, r->type))
+ return 0;
+
+ switch(l->op) {
+ case ONAME:
+ case OCLOSUREVAR:
+ return l == r;
+
+ case ODOT:
+ case ODOTPTR:
+ return l->right != nil && r->right != nil && l->right->sym == r->right->sym && samesafeexpr(l->left, r->left);
+
+ case OIND:
+ return samesafeexpr(l->left, r->left);
+
+ case OINDEX:
+ return samesafeexpr(l->left, r->left) && samesafeexpr(l->right, r->right);
+ }
+
+ return 0;
+}
+
/*
* type check assignment.
* if this assignment is the definition of a var on the left side,
n->typecheck = 1;
if(n->left->typecheck == 0)
typecheck(&n->left, Erv | Easgn);
+
+ // Recognize slices being updated in place, for better code generation later.
+ // Don't rewrite if using race detector, to avoid needing to teach race detector
+ // about this optimization.
+ if(n->left && n->left->op != OINDEXMAP && n->right && !flag_race) {
+ switch(n->right->op) {
+ case OSLICE:
+ case OSLICE3:
+ case OSLICESTR:
+ // For x = x[0:y], x can be updated in place, without touching pointer.
+ if(samesafeexpr(n->left, n->right->left) && (n->right->right->left == N || iszero(n->right->right->left)))
+ n->right->reslice = 1;
+ break;
+
+ case OAPPEND:
+ // For x = append(x, ...), x can be updated in place when there is capacity,
+ // without touching the pointer; otherwise the emitted code to growslice
+ // can take care of updating the pointer, and only in that case.
+ if(n->right->list != nil && samesafeexpr(n->left, n->right->list->n))
+ n->right->reslice = 1;
+ break;
+ }
+ }
}
static void
if(l->ullman >= UINF || l->op == OINDEXMAP)
return 1;
- r.op = 0;
+ memset(&r, 0, sizeof r);
if(needwritebarrier(l, &r))
return 1;
if(eqtype(l->type, rt))
if(r->op == OADDR && isglobal(r->left))
return 0;
+ // No write barrier for reslice: x = x[0:y] or x = append(x, ...).
+ // Both are compiled to modify x directly.
+ // In the case of append, a write barrier may still be needed
+ // if the underlying array grows, but the append code can
+ // generate the write barrier directly in that case.
+ // (It does not yet, but the cost of the write barrier will be
+ // small compared to the cost of the allocation.)
+ if(r->reslice) {
+ switch(r->op) {
+ case OSLICE:
+ case OSLICE3:
+ case OSLICESTR:
+ case OAPPEND:
+ break;
+ default:
+ dump("bad reslice-l", l);
+ dump("bad reslice-r", r);
+ break;
+ }
+ return 0;
+ }
+
// Otherwise, be conservative and use write barrier.
return 1;
}