]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/gc: optimize memclr of slices and arrays
authorJosh Bleecher Snyder <josharian@gmail.com>
Thu, 8 Jan 2015 01:44:49 +0000 (17:44 -0800)
committerJosh Bleecher Snyder <josharian@gmail.com>
Fri, 9 Jan 2015 22:35:25 +0000 (22:35 +0000)
Recognize loops of the form

for i := range a {
a[i] = zero
}

in which the evaluation of a is free from side effects.
Replace these loops with calls to memclr.
This occurs in the stdlib in 18 places.

The motivating example is clearing a byte slice:

benchmark                old ns/op     new ns/op     delta
BenchmarkGoMemclr5       3.31          3.26          -1.51%
BenchmarkGoMemclr16      13.7          3.28          -76.06%
BenchmarkGoMemclr64      50.8          4.14          -91.85%
BenchmarkGoMemclr256     157           6.02          -96.17%

Update #5373.

Change-Id: I99d3e6f5f268e8c6499b7e661df46403e5eb83e4
Reviewed-on: https://go-review.googlesource.com/2520
Reviewed-by: Keith Randall <khr@golang.org>
src/cmd/gc/builtin.c
src/cmd/gc/go.h
src/cmd/gc/range.c
src/cmd/gc/runtime.go
src/cmd/gc/typecheck.c
src/runtime/memmove_test.go
test/fixedbugs/issue5373.go [new file with mode: 0644]

index b11aa1e6f9dbfd2a0c9ba5c33a335f6eb96cf1ef..e2e14f03d2b887e1f1f64f64b3465e7f3e9b3768 100644 (file)
@@ -126,6 +126,7 @@ char *runtimeimport =
        "func @\"\".makeslice (@\"\".typ·2 *byte, @\"\".nel·3 int64, @\"\".cap·4 int64) (@\"\".ary·1 []any)\n"
        "func @\"\".growslice (@\"\".typ·2 *byte, @\"\".old·3 []any, @\"\".n·4 int64) (@\"\".ary·1 []any)\n"
        "func @\"\".memmove (@\"\".to·1 *any, @\"\".frm·2 *any, @\"\".length·3 uintptr)\n"
+       "func @\"\".memclr (@\"\".ptr·1 *byte, @\"\".length·2 uintptr)\n"
        "func @\"\".memequal (@\"\".x·2 *any, @\"\".y·3 *any, @\"\".size·4 uintptr) (? bool)\n"
        "func @\"\".memequal8 (@\"\".x·2 *any, @\"\".y·3 *any) (? bool)\n"
        "func @\"\".memequal16 (@\"\".x·2 *any, @\"\".y·3 *any) (? bool)\n"
index 5150026aec71ba6ad7d075122b00870e9f750cf8..21cf8b894d1daae400a94629f424ab03aa5af182 100644 (file)
@@ -1451,6 +1451,7 @@ void      walkswitch(Node *sw);
  *     typecheck.c
  */
 int    islvalue(Node *n);
+int    samesafeexpr(Node *l, Node *r);
 Node*  typecheck(Node **np, int top);
 void   typechecklist(NodeList *l, int top);
 Node*  typecheckdef(Node *n);
index 4ed45283238eb7de01864992faf87e3b532cf410..947b4580ed7cd8c74e45c267f4078bced4f0b61d 100644 (file)
@@ -144,6 +144,72 @@ walkrange(Node *n)
                fatal("walkrange");
 
        case TARRAY:
+               // Lower n into runtime·memclr if possible, for
+               // fast zeroing of slices and arrays (issue 5373).
+               // Look for instances of
+               //
+               // for i := range a {
+               //      a[i] = zero
+               // }
+               //
+               // in which the evaluation of a is side-effect-free.
+               if(!debug['N'])
+               if(!flag_race)
+               if(v1 != N)
+               if(v2 == N)
+               if(n->nbody != nil)
+               if(n->nbody->n != N)    // at least one statement in body
+               if(n->nbody->next == nil) {     // at most one statement in body
+                       tmp = n->nbody->n;      // first statement of body
+                       if(tmp->op == OAS)
+                       if(tmp->left->op == OINDEX)
+                       if(samesafeexpr(tmp->left->left, a))
+                       if(samesafeexpr(tmp->left->right, v1))
+                       if(t->type->width > 0)
+                       if(iszero(tmp->right)) {
+                               // Convert to
+                               // if len(a) != 0 {
+                               //      hp = &a[0]
+                               //      hn = len(a)*sizeof(elem(a))
+                               //      memclr(hp, hn)
+                               //      i = len(a) - 1
+                               // }
+                               n->op = OIF;
+                               n->nbody = nil;
+                               n->ntest = nod(ONE, nod(OLEN, a, N), nodintconst(0));
+                               n->nincr = nil;
+
+                               // hp = &a[0]
+                               hp = temp(ptrto(types[TUINT8]));
+                               tmp = nod(OINDEX, a, nodintconst(0));
+                               tmp->bounded = 1;
+                               tmp = nod(OADDR, tmp, N);
+                               tmp = nod(OCONVNOP, tmp, N);
+                               n->nbody = list(n->nbody, nod(OAS, hp, tmp));
+
+                               // hn = len(a) * sizeof(elem(a))
+                               hn = temp(types[TUINTPTR]);
+                               tmp = nod(OLEN, a, N);
+                               tmp = nod(OMUL, tmp, nodintconst(t->type->width));
+                               tmp = conv(tmp, types[TUINTPTR]);
+                               n->nbody = list(n->nbody, nod(OAS, hn, tmp));
+
+                               // memclr(hp, hn)
+                               fn = mkcall("memclr", T, nil, hp, hn);
+                               n->nbody = list(n->nbody, fn);
+
+                               // i = len(a) - 1
+                               v1 = nod(OAS, v1, nod(OSUB, nod(OLEN, a, N), nodintconst(1)));
+                               n->nbody = list(n->nbody, v1);
+
+                               typecheck(&n->ntest, Erv);
+                               typechecklist(n->nbody, Etop);
+                               walkstmt(&n);
+                               lineno = lno;
+                               return;
+                       }
+               }
+
                // orderstmt arranged for a copy of the array/slice variable if needed.
                ha = a;
                hv1 = temp(types[TINT]);
index a294456007a3119cd7b24de25839e2e971acda06..463bb3a76da7896fe956709034db632976d82ecb 100644 (file)
@@ -160,6 +160,7 @@ func block()
 func makeslice(typ *byte, nel int64, cap int64) (ary []any)
 func growslice(typ *byte, old []any, n int64) (ary []any)
 func memmove(to *any, frm *any, length uintptr)
+func memclr(ptr *byte, length uintptr)
 
 func memequal(x, y *any, size uintptr) bool
 func memequal8(x, y *any) bool
index aa693aff7f914525f377c6bc8f28a205d0b6d8ad..8a3b486bd6686013229c3a0acb9ab570ee5ba662 100644 (file)
@@ -2831,7 +2831,7 @@ checkassignlist(NodeList *l)
 
 // Check whether l and r are the same side effect-free expression,
 // so that it is safe to reuse one instead of computing both.
-static int
+int
 samesafeexpr(Node *l, Node *r)
 {
        if(l->op != r->op || !eqtype(l->type, r->type))
index ffda4fe6c535e3ef13456e678e01e1c841071737..29c62cc37d0d76622a1e644c88f7ce7e0b52b0df 100644 (file)
@@ -162,6 +162,20 @@ func BenchmarkMemclr256(b *testing.B)   { bmMemclr(b, 256) }
 func BenchmarkMemclr4096(b *testing.B)  { bmMemclr(b, 4096) }
 func BenchmarkMemclr65536(b *testing.B) { bmMemclr(b, 65536) }
 
+func bmGoMemclr(b *testing.B, n int) {
+       x := make([]byte, n)
+       b.SetBytes(int64(n))
+       for i := 0; i < b.N; i++ {
+               for j := range x {
+                       x[j] = 0
+               }
+       }
+}
+func BenchmarkGoMemclr5(b *testing.B)   { bmGoMemclr(b, 5) }
+func BenchmarkGoMemclr16(b *testing.B)  { bmGoMemclr(b, 16) }
+func BenchmarkGoMemclr64(b *testing.B)  { bmGoMemclr(b, 64) }
+func BenchmarkGoMemclr256(b *testing.B) { bmGoMemclr(b, 256) }
+
 func BenchmarkClearFat8(b *testing.B) {
        for i := 0; i < b.N; i++ {
                var x [8 / 4]uint32
diff --git a/test/fixedbugs/issue5373.go b/test/fixedbugs/issue5373.go
new file mode 100644 (file)
index 0000000..17ce189
--- /dev/null
@@ -0,0 +1,71 @@
+// run
+
+// Copyright 2015 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Ensure that zeroing range loops have the requisite side-effects.
+
+package main
+
+import (
+       "fmt"
+       "os"
+)
+
+func check(n int) {
+       // When n == 0, i is untouched by the range loop.
+       // Picking an initial value of -1 for i makes the
+       // "want" calculation below correct in all cases.
+       i := -1
+       s := make([]byte, n)
+       for i = range s {
+               s[i] = 0
+       }
+       if want := n - 1; i != want {
+               fmt.Printf("index after range with side-effect = %d want %d\n", i, want)
+               os.Exit(1)
+       }
+
+       i = n + 1
+       // i is shadowed here, so its value should be unchanged.
+       for i := range s {
+               s[i] = 0
+       }
+       if want := n + 1; i != want {
+               fmt.Printf("index after range without side-effect = %d want %d\n", i, want)
+               os.Exit(1)
+       }
+
+       // Index variable whose evaluation has side-effects
+       var x int
+       f := func() int {
+               x++
+               return 0
+       }
+       var a [1]int
+       for a[f()] = range s {
+               s[a[f()]] = 0
+       }
+       if want := n * 2; x != want {
+               fmt.Printf("index function calls = %d want %d\n", x, want)
+               os.Exit(1)
+       }
+
+       // Range expression whose evaluation has side-effects
+       x = 0
+       b := [1][]byte{s}
+       for i := range b[f()] {
+               b[f()][i] = 0
+       }
+       if want := n + 1; x != n+1 {
+               fmt.Printf("range expr function calls = %d want %d\n", x, want)
+               os.Exit(1)
+       }
+}
+
+func main() {
+       check(0)
+       check(1)
+       check(15)
+}