]> Cypherpunks repositories - gostls13.git/commitdiff
runtime: smarter slice grow
authorDmitriy Vyukov <dvyukov@google.com>
Mon, 27 Jan 2014 11:11:12 +0000 (15:11 +0400)
committerDmitriy Vyukov <dvyukov@google.com>
Mon, 27 Jan 2014 11:11:12 +0000 (15:11 +0400)
When growing slice take into account size of the allocated memory block.
Also apply the same optimization to string->[]byte conversion.
Fixes #6307.

benchmark                    old ns/op    new ns/op    delta
BenchmarkAppendGrowByte        4541036      4434108   -2.35%
BenchmarkAppendGrowString     59885673     44813604  -25.17%

LGTM=khr
R=khr
CC=golang-codereviews, iant, rsc
https://golang.org/cl/53340044

src/pkg/runtime/append_test.go
src/pkg/runtime/malloc.h
src/pkg/runtime/msize.c
src/pkg/runtime/slice.c
src/pkg/runtime/string.goc

index 937c8259fdd8503f31394fa76cb8031b64d9e1e1..a67dc9b494829fe68b529485d8a1b5e03ca4e528 100644 (file)
@@ -19,6 +19,25 @@ func BenchmarkAppend(b *testing.B) {
        }
 }
 
+func BenchmarkAppendGrowByte(b *testing.B) {
+       for i := 0; i < b.N; i++ {
+               var x []byte
+               for j := 0; j < 1<<20; j++ {
+                       x = append(x, byte(j))
+               }
+       }
+}
+
+func BenchmarkAppendGrowString(b *testing.B) {
+       var s string
+       for i := 0; i < b.N; i++ {
+               var x []string
+               for j := 0; j < 1<<20; j++ {
+                       x = append(x, s)
+               }
+       }
+}
+
 func benchmarkAppendBytes(b *testing.B, length int) {
        b.StopTimer()
        x := make([]byte, 0, N)
index 8122b4b0b89ff9b7659953acfcbbd63705d2c522..4146299223bed7915097978d611e026807865681 100644 (file)
@@ -273,6 +273,7 @@ extern MStats mstats;
 //     making new objects in class i
 
 int32  runtime·SizeToClass(int32);
+uintptr        runtime·roundupsize(uintptr);
 extern int32   runtime·class_to_size[NumSizeClasses];
 extern int32   runtime·class_to_allocnpages[NumSizeClasses];
 extern int8    runtime·size_to_class8[1024/8 + 1];
index 85088fdf465e65228849739397b3fab01acbd1e1..63d5ef490e8087ab9e7676e0cb4f471d1cc2c4b7 100644 (file)
@@ -162,3 +162,18 @@ dump:
        }
        runtime·throw("InitSizes failed");
 }
+
+// Returns size of the memory block that mallocgc will allocate if you ask for the size.
+uintptr
+runtime·roundupsize(uintptr size)
+{
+       if(size < MaxSmallSize) {
+               if(size <= 1024-8)
+                       return runtime·class_to_size[runtime·size_to_class8[(size+7)>>3]];
+               else
+                       return runtime·class_to_size[runtime·size_to_class128[(size-1024+127) >> 7]];
+       }
+       if(size + PageSize < size)
+               return size;
+       return ROUND(size, PageSize);
+}
index ef8ab7fe0a776c150dbdb3de27608f70901dd64f..c3b240bc83250af259c41c59a7834e762b4c7539 100644 (file)
@@ -8,6 +8,7 @@
 #include "typekind.h"
 #include "malloc.h"
 #include "race.h"
+#include "stack.h"
 #include "../../cmd/ld/textflag.h"
 
 enum
@@ -92,26 +93,53 @@ runtime·growslice(SliceType *t, Slice old, int64 n, Slice ret)
 static void
 growslice1(SliceType *t, Slice x, intgo newcap, Slice *ret)
 {
-       intgo m;
+       intgo newcap1;
+       uintptr capmem, lenmem;
+       int32 flag;
+       Type *typ;
+
+       typ = t->elem;
+       if(typ->size == 0) {
+               *ret = x;
+               ret->cap = newcap;
+               return;
+       }
 
-       m = x.cap;
+       newcap1 = x.cap;
        
        // Using newcap directly for m+m < newcap handles
        // both the case where m == 0 and also the case where
        // m+m/4 wraps around, in which case the loop
        // below might never terminate.
-       if(m+m < newcap)
-               m = newcap;
+       if(newcap1+newcap1 < newcap)
+               newcap1 = newcap;
        else {
                do {
                        if(x.len < 1024)
-                               m += m;
+                               newcap1 += newcap1;
                        else
-                               m += m/4;
-               } while(m < newcap);
+                               newcap1 += newcap1/4;
+               } while(newcap1 < newcap);
        }
-       makeslice1(t, x.len, m, ret);
-       runtime·memmove(ret->array, x.array, ret->len * t->elem->size);
+
+       if(newcap1 > MaxMem/typ->size)
+               runtime·panicstring("growslice: cap out of range");
+       capmem = runtime·roundupsize(newcap1*typ->size);
+       flag = FlagNoZero;
+       if(typ->kind&KindNoPointers)
+               flag |= FlagNoScan;
+       // Here we allocate with FlagNoZero but potentially w/o FlagNoScan,
+       // GC must not see this blocks until memclr below.
+       m->locks++;
+       ret->array = runtime·mallocgc(capmem, (uintptr)typ|TypeInfo_Array, flag);
+       ret->len = x.len;
+       ret->cap = capmem/typ->size;
+       lenmem = x.len*typ->size;
+       runtime·memmove(ret->array, x.array, lenmem);
+       runtime·memclr(ret->array+lenmem, capmem-lenmem);
+       m->locks--;
+       if(m->locks == 0 && g->preempt)  // restore the preemption request in case we've cleared it in newstack
+               g->stackguard0 = StackPreempt;
 }
 
 // copy(to any, fr any, wid uintptr) int
index 8eff05a8438bedf10ce12c480465b976bda128e2..407188cfe6477db0a66f8c943cbff690540d6cab 100644 (file)
@@ -78,6 +78,7 @@ runtime·gostringn(byte *str, intgo l)
        return s;
 }
 
+// used by cmd/cgo
 Slice
 runtime·gobytes(byte *p, intgo n)
 {
@@ -278,10 +279,15 @@ func slicebytetostring(b Slice) (s String) {
 }
 
 func stringtoslicebyte(s String) (b Slice) {
-       b.array = runtime·mallocgc(s.len, 0, FlagNoScan|FlagNoZero);
+       uintptr cap;
+
+       cap = runtime·roundupsize(s.len);
+       b.array = runtime·mallocgc(cap, 0, FlagNoScan|FlagNoZero);
        b.len = s.len;
-       b.cap = s.len;
+       b.cap = cap;
        runtime·memmove(b.array, s.str, s.len);
+       if(cap != b.len)
+               runtime·memclr(b.array+b.len, cap-b.len);
 }
 
 func slicerunetostring(b Slice) (s String) {
@@ -316,6 +322,7 @@ func stringtoslicerune(s String) (b Slice) {
        intgo n;
        int32 dum, *r;
        uint8 *p, *ep;
+       uintptr mem;
 
        // two passes.
        // unlike slicerunetostring, no race because strings are immutable.
@@ -327,13 +334,16 @@ func stringtoslicerune(s String) (b Slice) {
                n++;
        }
 
-       b.array = runtime·mallocgc(n*sizeof(r[0]), 0, FlagNoScan|FlagNoZero);
+       mem = runtime·roundupsize(n*sizeof(r[0]));
+       b.array = runtime·mallocgc(mem, 0, FlagNoScan|FlagNoZero);
        b.len = n;
-       b.cap = n;
+       b.cap = mem/sizeof(r[0]);
        p = s.str;
        r = (int32*)b.array;
        while(p < ep)
                p += runtime·charntorune(r++, p, ep-p);
+       if(b.cap > b.len)
+               runtime·memclr(b.array+b.len*sizeof(r[0]), (b.cap-b.len)*sizeof(r[0]));
 }
 
 enum