From 8cfed59941655f6a77af065f0196dff6450f52a7 Mon Sep 17 00:00:00 2001 From: Rob Pike Date: Fri, 1 Mar 2013 14:31:26 -0800 Subject: [PATCH] runtime: special-case small byte appends. Update #3679. BenchmarkAppend1Byte 484 199 -58.88% BenchmarkAppend4Bytes 829 286 -65.50% BenchmarkAppend8Bytes 484 365 -24.59% BenchmarkAppend16Bytes 484 498 +2.89% BenchmarkAppend32Bytes 486 484 -0.41% R=iant, dave, rsc CC=golang-dev https://golang.org/cl/7443047 --- src/pkg/runtime/append_test.go | 43 ++++++++++++++++++++++++++++++++++ src/pkg/runtime/slice.c | 23 ++++++++++++++++-- 2 files changed, 64 insertions(+), 2 deletions(-) diff --git a/src/pkg/runtime/append_test.go b/src/pkg/runtime/append_test.go index b8552224e5..e9fc4a7901 100644 --- a/src/pkg/runtime/append_test.go +++ b/src/pkg/runtime/append_test.go @@ -19,6 +19,39 @@ func BenchmarkAppend(b *testing.B) { } } +func benchmarkAppendBytes(b *testing.B, length int) { + b.StopTimer() + x := make([]byte, 0, N) + y := make([]byte, length) + b.StartTimer() + for i := 0; i < b.N; i++ { + x = x[0:0] + for j := 0; j < N; j++ { + x = append(x, y...) + } + } +} + +func BenchmarkAppend1Byte(b *testing.B) { + benchmarkAppendBytes(b, 1) +} + +func BenchmarkAppend4Bytes(b *testing.B) { + benchmarkAppendBytes(b, 4) +} + +func BenchmarkAppend8Bytes(b *testing.B) { + benchmarkAppendBytes(b, 8) +} + +func BenchmarkAppend16Bytes(b *testing.B) { + benchmarkAppendBytes(b, 16) +} + +func BenchmarkAppend32Bytes(b *testing.B) { + benchmarkAppendBytes(b, 32) +} + func BenchmarkAppendSpecialCase(b *testing.B) { b.StopTimer() x := make([]int, 0, N) @@ -50,3 +83,13 @@ func TestSideEffectOrder(t *testing.T) { t.Error("append failed: ", x[0], x[1]) } } + +func TestAppendOverlap(t *testing.T) { + x := []byte("1234") + x = append(x[1:], x...) // p > q in runtime·appendslice. + got := string(x) + want := "2341234" + if got != want { + t.Errorf("overlap failed: got %q want %q", got, want) + } +} diff --git a/src/pkg/runtime/slice.c b/src/pkg/runtime/slice.c index 1678d5df8d..b517c3aa33 100644 --- a/src/pkg/runtime/slice.c +++ b/src/pkg/runtime/slice.c @@ -79,6 +79,7 @@ runtime·appendslice(SliceType *t, Slice x, Slice y, Slice ret) intgo m; uintptr w; void *pc; + uint8 *p, *q; m = x.len+y.len; w = t->elem->size; @@ -104,7 +105,25 @@ runtime·appendslice(SliceType *t, Slice x, Slice y, Slice ret) runtime·racewriterangepc(ret.array+ret.len*w, y.len*w, w, pc, runtime·appendslice); } - runtime·memmove(ret.array + ret.len*w, y.array, y.len*w); + // A very common case is appending bytes. Small appends can avoid the overhead of memmove. + // We can generalize a bit here, and just pick small-sized appends. + p = ret.array+ret.len*w; + q = y.array; + w *= y.len; + // TODO: make 16 an architecture-dependent constant. + if(w <= 16) { // 16 empirically tested as approximate crossover on amd64. + if(p <= q || w <= p-q) // No overlap. + while(w-- > 0) + *p++ = *q++; + else { + p += w; + q += w; + while(w-- > 0) + *--p = *--q; + } + } else { + runtime·memmove(p, q, w); + } ret.len += y.len; FLUSH(&ret); } @@ -121,7 +140,7 @@ runtime·appendstr(SliceType *t, Slice x, String y, Slice ret) m = x.len+y.len; if(m < x.len) - runtime·throw("append: slice overflow"); + runtime·throw("append: string overflow"); if(m > x.cap) growslice1(t, x, m, &ret); -- 2.48.1