}
}
+func benchmarkAppendBytes(b *testing.B, length int) {
+ b.StopTimer()
+ x := make([]byte, 0, N)
+ y := make([]byte, length)
+ b.StartTimer()
+ for i := 0; i < b.N; i++ {
+ x = x[0:0]
+ for j := 0; j < N; j++ {
+ x = append(x, y...)
+ }
+ }
+}
+
+func BenchmarkAppend1Byte(b *testing.B) {
+ benchmarkAppendBytes(b, 1)
+}
+
+func BenchmarkAppend4Bytes(b *testing.B) {
+ benchmarkAppendBytes(b, 4)
+}
+
+func BenchmarkAppend8Bytes(b *testing.B) {
+ benchmarkAppendBytes(b, 8)
+}
+
+func BenchmarkAppend16Bytes(b *testing.B) {
+ benchmarkAppendBytes(b, 16)
+}
+
+func BenchmarkAppend32Bytes(b *testing.B) {
+ benchmarkAppendBytes(b, 32)
+}
+
func BenchmarkAppendSpecialCase(b *testing.B) {
b.StopTimer()
x := make([]int, 0, N)
t.Error("append failed: ", x[0], x[1])
}
}
+
+func TestAppendOverlap(t *testing.T) {
+ x := []byte("1234")
+ x = append(x[1:], x...) // p > q in runtime·appendslice.
+ got := string(x)
+ want := "2341234"
+ if got != want {
+ t.Errorf("overlap failed: got %q want %q", got, want)
+ }
+}
intgo m;
uintptr w;
void *pc;
+ uint8 *p, *q;
m = x.len+y.len;
w = t->elem->size;
runtime·racewriterangepc(ret.array+ret.len*w, y.len*w, w, pc, runtime·appendslice);
}
- runtime·memmove(ret.array + ret.len*w, y.array, y.len*w);
+ // A very common case is appending bytes. Small appends can avoid the overhead of memmove.
+ // We can generalize a bit here, and just pick small-sized appends.
+ p = ret.array+ret.len*w;
+ q = y.array;
+ w *= y.len;
+ // TODO: make 16 an architecture-dependent constant.
+ if(w <= 16) { // 16 empirically tested as approximate crossover on amd64.
+ if(p <= q || w <= p-q) // No overlap.
+ while(w-- > 0)
+ *p++ = *q++;
+ else {
+ p += w;
+ q += w;
+ while(w-- > 0)
+ *--p = *--q;
+ }
+ } else {
+ runtime·memmove(p, q, w);
+ }
ret.len += y.len;
FLUSH(&ret);
}
m = x.len+y.len;
if(m < x.len)
- runtime·throw("append: slice overflow");
+ runtime·throw("append: string overflow");
if(m > x.cap)
growslice1(t, x, m, &ret);