From: Carlo Alberto Ferraris Date: Fri, 24 Feb 2017 23:48:00 +0000 (+0900) Subject: bytes: make bytes.Buffer cache-friendly X-Git-Tag: go1.9beta1~1405 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=55310403ddf051634fa398b4c9e6013d530753f5;p=gostls13.git bytes: make bytes.Buffer cache-friendly During benchmark of an internal tool we found out that (*Buffer).Reset() was surprisingly showing up in CPU profiles. This CL contains two related changes aimed at speeding up Reset(): 1. Create a fast path for Truncate(0) by moving the logic to Reset() (this makes Reset() a simple leaf func that gets inlined since it gets compiled to 3 MOVx instructions). Accordingly change calls in the rest of the Buffer methods to call Reset() instead of Truncate(0). 2. Reorder the fields in the Buffer struct so that frequently accessed fields are packed together (buf, off, lastRead). This also make them likely to be in the same cacheline. Ideally it would be advisable to have Buffer{} cacheline-aligned, but I couldn't find a way to do this without changing the size of the bootstrap array (but this will cause some regressions, because it will make duffcopy show up in CPU profiles where it wasn't showing up before). go1 benchmarks are not really affected, but some other benchmarks that exercise Buffer more show improvements: name old time/op new time/op delta BinaryTree17-4 2.46s ± 9% 2.43s ± 3% ~ (p=0.982 n=14+14) Fannkuch11-4 2.98s ± 1% 2.90s ± 1% -2.58% (p=0.000 n=15+14) FmtFprintfEmpty-4 45.2ns ± 1% 45.2ns ± 1% ~ (p=0.494 n=14+15) FmtFprintfString-4 76.8ns ± 1% 83.1ns ± 2% +8.23% (p=0.000 n=10+15) FmtFprintfInt-4 78.0ns ± 2% 74.6ns ± 1% -4.46% (p=0.000 n=15+15) FmtFprintfIntInt-4 113ns ± 1% 109ns ± 2% -2.91% (p=0.000 n=13+15) FmtFprintfPrefixedInt-4 152ns ± 2% 143ns ± 2% -6.04% (p=0.000 n=15+14) FmtFprintfFloat-4 224ns ± 1% 222ns ± 2% -1.08% (p=0.001 n=15+14) FmtManyArgs-4 464ns ± 2% 463ns ± 2% ~ (p=0.303 n=14+15) GobDecode-4 6.25ms ± 2% 6.32ms ± 3% +1.20% (p=0.002 n=14+14) GobEncode-4 5.41ms ± 2% 5.41ms ± 2% ~ (p=0.967 n=15+15) Gzip-4 215ms ± 2% 218ms ± 2% +1.35% (p=0.002 n=15+15) Gunzip-4 34.3ms ± 2% 34.2ms ± 2% ~ (p=0.539 n=15+15) HTTPClientServer-4 76.4µs ± 2% 75.4µs ± 1% -1.31% (p=0.000 n=15+15) JSONEncode-4 14.7ms ± 2% 14.6ms ± 3% ~ (p=0.094 n=14+14) JSONDecode-4 48.0ms ± 1% 48.5ms ± 1% +0.92% (p=0.001 n=14+12) Mandelbrot200-4 4.04ms ± 2% 4.06ms ± 1% ~ (p=0.108 n=15+13) GoParse-4 2.99ms ± 2% 3.00ms ± 1% ~ (p=0.130 n=15+13) RegexpMatchEasy0_32-4 78.3ns ± 1% 79.5ns ± 1% +1.51% (p=0.000 n=15+14) RegexpMatchEasy0_1K-4 185ns ± 1% 186ns ± 1% +0.76% (p=0.005 n=15+15) RegexpMatchEasy1_32-4 79.0ns ± 2% 76.7ns ± 1% -2.87% (p=0.000 n=14+15) name old speed new speed delta GobDecode-4 123MB/s ± 2% 121MB/s ± 3% -1.18% (p=0.002 n=14+14) GobEncode-4 142MB/s ± 2% 142MB/s ± 1% ~ (p=0.959 n=15+15) Gzip-4 90.3MB/s ± 2% 89.1MB/s ± 2% -1.34% (p=0.002 n=15+15) Gunzip-4 565MB/s ± 2% 567MB/s ± 2% ~ (p=0.539 n=15+15) JSONEncode-4 132MB/s ± 2% 133MB/s ± 3% ~ (p=0.091 n=14+14) JSONDecode-4 40.4MB/s ± 1% 40.0MB/s ± 1% -0.92% (p=0.001 n=14+12) GoParse-4 19.4MB/s ± 2% 19.3MB/s ± 1% ~ (p=0.121 n=15+13) RegexpMatchEasy0_32-4 409MB/s ± 1% 403MB/s ± 1% -1.47% (p=0.000 n=15+14) RegexpMatchEasy0_1K-4 5.53GB/s ± 1% 5.49GB/s ± 1% -0.86% (p=0.002 n=15+15) RegexpMatchEasy1_32-4 405MB/s ± 2% 417MB/s ± 1% +2.94% (p=0.000 n=14+15) name old time/op new time/op delta PoolsSingle1K-4 34.9ns ± 2% 30.4ns ± 4% -12.80% (p=0.000 n=15+15) PoolsSingle64K-4 36.9ns ± 1% 34.4ns ± 4% -6.72% (p=0.000 n=14+15) PoolsRandomSmall-4 34.8ns ± 3% 29.5ns ± 1% -15.19% (p=0.000 n=15+14) PoolsRandomLarge-4 38.6ns ± 1% 34.3ns ± 3% -11.17% (p=0.000 n=14+15) PoolSingle1K-4 26.1ns ± 1% 21.2ns ± 2% -18.59% (p=0.000 n=15+14) PoolSingle64K-4 26.7ns ± 2% 21.5ns ± 2% -19.72% (p=0.000 n=15+15) MakeSingle1K-4 24.2ns ± 2% 24.3ns ± 3% ~ (p=0.132 n=13+15) MakeSingle64K-4 6.76µs ± 1% 6.96µs ± 5% +2.94% (p=0.002 n=13+13) MakeRandomSmall-4 531ns ± 4% 538ns ± 5% ~ (p=0.066 n=14+15) MakeRandomLarge-4 152µs ± 0% 152µs ± 1% -0.31% (p=0.001 n=14+13) Change-Id: I86d7d9d2cac65335baf62214fbb35ba0fd8f9528 Reviewed-on: https://go-review.googlesource.com/37416 Run-TryBot: Ian Lance Taylor TryBot-Result: Gobot Gobot Reviewed-by: Ian Lance Taylor --- diff --git a/src/bytes/buffer.go b/src/bytes/buffer.go index 196419dc3d..c821fa7c54 100644 --- a/src/bytes/buffer.go +++ b/src/bytes/buffer.go @@ -15,10 +15,15 @@ import ( // A Buffer is a variable-sized buffer of bytes with Read and Write methods. // The zero value for Buffer is an empty buffer ready to use. type Buffer struct { - buf []byte // contents are the bytes buf[off : len(buf)] - off int // read at &buf[off], write at &buf[len(buf)] - bootstrap [64]byte // memory to hold first slice; helps small buffers avoid allocation. - lastRead readOp // last read operation, so that Unread* can work correctly. + buf []byte // contents are the bytes buf[off : len(buf)] + off int // read at &buf[off], write at &buf[len(buf)] + lastRead readOp // last read operation, so that Unread* can work correctly. + // FIXME: lastRead can fit in a single byte + + // memory to hold first slice; helps small buffers avoid allocation. + // FIXME: it would be advisable to align Buffer to cachelines to avoid false + // sharing. + bootstrap [64]byte } // The readOp constants describe the last action performed on @@ -68,13 +73,13 @@ func (b *Buffer) Cap() int { return cap(b.buf) } // but continues to use the same allocated storage. // It panics if n is negative or greater than the length of the buffer. func (b *Buffer) Truncate(n int) { + if n == 0 { + b.Reset() + return + } b.lastRead = opInvalid - switch { - case n < 0 || n > b.Len(): + if n < 0 || n > b.Len() { panic("bytes.Buffer: truncation out of range") - case n == 0: - // Reuse buffer space. - b.off = 0 } b.buf = b.buf[0 : b.off+n] } @@ -82,7 +87,11 @@ func (b *Buffer) Truncate(n int) { // Reset resets the buffer to be empty, // but it retains the underlying storage for use by future writes. // Reset is the same as Truncate(0). -func (b *Buffer) Reset() { b.Truncate(0) } +func (b *Buffer) Reset() { + b.buf = b.buf[:0] + b.off = 0 + b.lastRead = opInvalid +} // grow grows the buffer to guarantee space for n more bytes. // It returns the index where bytes should be written. @@ -91,7 +100,7 @@ func (b *Buffer) grow(n int) int { m := b.Len() // If buffer is empty, reset to recover space. if m == 0 && b.off != 0 { - b.Truncate(0) + b.Reset() } if len(b.buf)+n > cap(b.buf) { var buf []byte @@ -161,7 +170,7 @@ func (b *Buffer) ReadFrom(r io.Reader) (n int64, err error) { b.lastRead = opInvalid // If buffer is empty, reset to recover space. if b.off >= len(b.buf) { - b.Truncate(0) + b.Reset() } for { if free := cap(b.buf) - len(b.buf); free < MinRead { @@ -225,7 +234,7 @@ func (b *Buffer) WriteTo(w io.Writer) (n int64, err error) { } } // Buffer is now empty; reset. - b.Truncate(0) + b.Reset() return } @@ -264,7 +273,7 @@ func (b *Buffer) Read(p []byte) (n int, err error) { b.lastRead = opInvalid if b.off >= len(b.buf) { // Buffer is empty, reset to recover space. - b.Truncate(0) + b.Reset() if len(p) == 0 { return } @@ -302,7 +311,7 @@ func (b *Buffer) ReadByte() (byte, error) { b.lastRead = opInvalid if b.off >= len(b.buf) { // Buffer is empty, reset to recover space. - b.Truncate(0) + b.Reset() return 0, io.EOF } c := b.buf[b.off] @@ -320,7 +329,7 @@ func (b *Buffer) ReadRune() (r rune, size int, err error) { b.lastRead = opInvalid if b.off >= len(b.buf) { // Buffer is empty, reset to recover space. - b.Truncate(0) + b.Reset() return 0, 0, io.EOF } c := b.buf[b.off]