From: Josh Bleecher Snyder Date: Sun, 8 Apr 2018 00:40:57 +0000 (-0700) Subject: text/tabwriter: reduce allocations from tracking cells X-Git-Tag: go1.11beta1~825 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=9ee7662cd68cf20571e9fd45688fb2a43f6b3051;p=gostls13.git text/tabwriter: reduce allocations from tracking cells The tabwriter tracks cells on a line-by-line basis. This can be memory-hungry when working with large input. This change adds two optimizations. First, when there's an existing cell slice for a line, don't overwrite it by appending. This helps when re-using a Writer, or when the output is broken into groups, e.g. by a blank line. We now re-use that existing cell slice. Second, we predict that the number of cells in a line will probably match those of the previous line, since tabwriter is most often used to format tables. This has a noticeable impact on cmd/objdump (#24725). It reduces allocated space by about 55%. It also speeds it up some. Using "benchcmd -n 10 Objdump go tool objdump `which go`": name old time/op new time/op delta ObjdumpCompile 9.03s ± 1% 8.51s ± 1% -5.81% (p=0.000 n=10+10) It might also imaginably speed up gofmt on some large machine-generated code. name old time/op new time/op delta Table/1x10/new-8 2.89µs ± 1% 2.39µs ± 1% -17.39% (p=0.000 n=13+14) Table/1x10/reuse-8 2.13µs ± 1% 1.29µs ± 2% -39.58% (p=0.000 n=14+15) Table/1x1000/new-8 203µs ± 0% 147µs ± 1% -27.45% (p=0.000 n=13+14) Table/1x1000/reuse-8 194µs ± 1% 113µs ± 2% -42.01% (p=0.000 n=14+15) Table/1x100000/new-8 33.1ms ± 1% 27.5ms ± 2% -17.08% (p=0.000 n=15+15) Table/1x100000/reuse-8 22.0ms ± 3% 11.8ms ± 1% -46.23% (p=0.000 n=14+12) Table/10x10/new-8 8.51µs ± 0% 6.52µs ± 1% -23.48% (p=0.000 n=13+15) Table/10x10/reuse-8 7.41µs ± 0% 4.59µs ± 3% -38.03% (p=0.000 n=14+15) Table/10x1000/new-8 749µs ± 0% 521µs ± 1% -30.39% (p=0.000 n=12+15) Table/10x1000/reuse-8 732µs ± 1% 448µs ± 2% -38.79% (p=0.000 n=15+14) Table/10x100000/new-8 102ms ± 2% 74ms ± 2% -28.05% (p=0.000 n=14+15) Table/10x100000/reuse-8 96.2ms ± 4% 55.4ms ± 3% -42.36% (p=0.000 n=15+15) Table/100x10/new-8 50.3µs ± 1% 43.3µs ± 1% -13.87% (p=0.000 n=14+15) Table/100x10/reuse-8 47.6µs ± 1% 36.1µs ± 1% -24.09% (p=0.000 n=14+14) Table/100x1000/new-8 5.17ms ± 1% 4.11ms ± 1% -20.40% (p=0.000 n=14+13) Table/100x1000/reuse-8 5.00ms ± 1% 3.73ms ± 1% -25.46% (p=0.000 n=14+14) Table/100x100000/new-8 654ms ± 2% 531ms ± 2% -18.86% (p=0.000 n=13+14) Table/100x100000/reuse-8 709ms ± 1% 505ms ± 2% -28.77% (p=0.000 n=12+15) Pyramid/10-8 4.22µs ± 1% 4.21µs ± 1% ~ (p=0.067 n=14+14) Pyramid/100-8 378µs ± 0% 378µs ± 0% +0.17% (p=0.022 n=13+13) Pyramid/1000-8 133ms ± 3% 132ms ± 3% ~ (p=0.148 n=15+15) Ragged/10-8 6.10µs ± 0% 5.16µs ± 0% -15.38% (p=0.000 n=14+15) Ragged/100-8 54.5µs ± 0% 43.8µs ± 0% -19.59% (p=0.000 n=14+15) Ragged/1000-8 532µs ± 0% 424µs ± 0% -20.25% (p=0.000 n=14+14) name old alloc/op new alloc/op delta Table/1x10/new-8 1.76kB ± 0% 1.52kB ± 0% -13.64% (p=0.000 n=15+15) Table/1x10/reuse-8 800B ± 0% 0B -100.00% (p=0.000 n=15+15) Table/1x1000/new-8 131kB ± 0% 99kB ± 0% -24.30% (p=0.000 n=15+15) Table/1x1000/reuse-8 80.0kB ± 0% 0.0kB ± 0% -99.99% (p=0.000 n=15+15) Table/1x100000/new-8 23.1MB ± 0% 19.9MB ± 0% -13.85% (p=0.000 n=15+15) Table/1x100000/reuse-8 8.30MB ± 0% 0.20MB ± 0% -97.60% (p=0.000 n=13+12) Table/10x10/new-8 8.94kB ± 0% 5.06kB ± 0% -43.47% (p=0.000 n=15+15) Table/10x10/reuse-8 7.52kB ± 0% 0.00kB -100.00% (p=0.000 n=15+15) Table/10x1000/new-8 850kB ± 0% 387kB ± 0% -54.50% (p=0.000 n=13+15) Table/10x1000/reuse-8 752kB ± 0% 0kB ± 0% -99.98% (p=0.000 n=13+15) Table/10x100000/new-8 95.7MB ± 0% 49.3MB ± 0% -48.50% (p=0.000 n=14+15) Table/10x100000/reuse-8 76.2MB ± 0% 2.5MB ± 0% -96.77% (p=0.000 n=13+15) Table/100x10/new-8 66.3kB ± 0% 38.0kB ± 0% -42.65% (p=0.000 n=15+15) Table/100x10/reuse-8 61.3kB ± 0% 0.0kB -100.00% (p=0.000 n=15+15) Table/100x1000/new-8 6.69MB ± 0% 3.25MB ± 0% -51.37% (p=0.000 n=15+15) Table/100x1000/reuse-8 6.13MB ± 0% 0.01MB ± 0% -99.89% (p=0.000 n=15+15) Table/100x100000/new-8 684MB ± 0% 340MB ± 0% -50.29% (p=0.000 n=14+15) Table/100x100000/reuse-8 648MB ± 0% 170MB ± 0% -73.78% (p=0.000 n=14+13) Pyramid/10-8 4.40kB ± 0% 4.40kB ± 0% ~ (all equal) Pyramid/100-8 652kB ± 0% 652kB ± 0% ~ (p=0.715 n=15+15) Pyramid/1000-8 96.7MB ± 0% 96.7MB ± 0% ~ (p=0.084 n=15+14) Ragged/10-8 5.17kB ± 0% 4.51kB ± 0% -12.69% (p=0.000 n=15+15) Ragged/100-8 50.2kB ± 0% 41.1kB ± 0% -18.04% (p=0.000 n=15+15) Ragged/1000-8 492kB ± 0% 401kB ± 0% -18.61% (p=0.000 n=15+15) name old allocs/op new allocs/op delta Table/1x10/new-8 29.0 ± 0% 21.0 ± 0% -27.59% (p=0.000 n=15+15) Table/1x10/reuse-8 20.0 ± 0% 0.0 -100.00% (p=0.000 n=15+15) Table/1x1000/new-8 2.02k ± 0% 1.02k ± 0% -49.38% (p=0.000 n=15+15) Table/1x1000/reuse-8 2.00k ± 0% 0.00k -100.00% (p=0.000 n=15+15) Table/1x100000/new-8 200k ± 0% 100k ± 0% -49.98% (p=0.000 n=15+15) Table/1x100000/reuse-8 200k ± 0% 1k ± 0% -99.50% (p=0.000 n=14+15) Table/10x10/new-8 66.0 ± 0% 31.0 ± 0% -53.03% (p=0.000 n=15+15) Table/10x10/reuse-8 50.0 ± 0% 0.0 -100.00% (p=0.000 n=15+15) Table/10x1000/new-8 5.03k ± 0% 1.04k ± 0% -79.36% (p=0.000 n=15+15) Table/10x1000/reuse-8 5.00k ± 0% 0.00k -100.00% (p=0.000 n=15+15) Table/10x100000/new-8 500k ± 0% 100k ± 0% -79.99% (p=0.000 n=15+15) Table/10x100000/reuse-8 500k ± 0% 5k ± 0% -99.00% (p=0.000 n=15+15) Table/100x10/new-8 102 ± 0% 40 ± 0% -60.78% (p=0.000 n=15+15) Table/100x10/reuse-8 80.0 ± 0% 0.0 -100.00% (p=0.000 n=15+15) Table/100x1000/new-8 8.04k ± 0% 1.05k ± 0% -86.91% (p=0.000 n=15+15) Table/100x1000/reuse-8 8.00k ± 0% 0.00k ± 0% -99.98% (p=0.000 n=15+15) Table/100x100000/new-8 800k ± 0% 100k ± 0% -87.49% (p=0.000 n=15+12) Table/100x100000/reuse-8 800k ± 0% 50k ± 0% -93.74% (p=0.000 n=14+13) Pyramid/10-8 20.0 ± 0% 20.0 ± 0% ~ (all equal) Pyramid/100-8 50.0 ± 0% 50.0 ± 0% ~ (all equal) Pyramid/1000-8 109 ± 0% 109 ± 0% ~ (all equal) Ragged/10-8 54.0 ± 0% 34.0 ± 0% -37.04% (p=0.000 n=15+15) Ragged/100-8 422 ± 0% 188 ± 0% -55.45% (p=0.000 n=15+15) Ragged/1000-8 4.03k ± 0% 1.66k ± 0% -58.80% (p=0.000 n=15+15) Change-Id: I0c0a392b02d5148a0a4b8ad4eaf98fa343980962 Reviewed-on: https://go-review.googlesource.com/106979 Run-TryBot: Josh Bleecher Snyder TryBot-Result: Gobot Gobot Reviewed-by: Brad Fitzpatrick --- diff --git a/src/text/tabwriter/tabwriter.go b/src/text/tabwriter/tabwriter.go index ecda758ab6..d2f38be26d 100644 --- a/src/text/tabwriter/tabwriter.go +++ b/src/text/tabwriter/tabwriter.go @@ -106,7 +106,27 @@ type Writer struct { widths []int // list of column widths in runes - re-used during formatting } -func (b *Writer) addLine() { b.lines = append(b.lines, []cell{}) } +func (b *Writer) addLine() { + // Grow slice instead of appending, + // as that gives us an opportunity + // to re-use an existing []cell. + if n := len(b.lines) + 1; n <= cap(b.lines) { + b.lines = b.lines[:n] + b.lines[n-1] = b.lines[n-1][:0] + } else { + b.lines = append(b.lines, nil) + } + + // The previous line is probably a good indicator + // of how many cells the current line will have. + // If the current line's capacity is smaller than that, + // abandon it and make a new one. + if n := len(b.lines); n >= 2 { + if prev := len(b.lines[n-2]); prev > cap(b.lines[n-1]) { + b.lines[n-1] = make([]cell, 0, prev) + } + } +} // Reset the current state. func (b *Writer) reset() { diff --git a/src/text/tabwriter/tabwriter_test.go b/src/text/tabwriter/tabwriter_test.go index 9d3111e2c2..ebcad5e34f 100644 --- a/src/text/tabwriter/tabwriter_test.go +++ b/src/text/tabwriter/tabwriter_test.go @@ -5,7 +5,10 @@ package tabwriter_test import ( + "bytes" + "fmt" "io" + "io/ioutil" "testing" . "text/tabwriter" ) @@ -650,3 +653,79 @@ func TestPanicDuringWrite(t *testing.T) { io.WriteString(w, "a\n\n") // the second \n triggers a call to w.Write and thus a panic t.Errorf("failed to panic during Write") } + +func BenchmarkTable(b *testing.B) { + for _, w := range [...]int{1, 10, 100} { + // Build a line with w cells. + line := bytes.Repeat([]byte("a\t"), w) + line = append(line, '\n') + for _, h := range [...]int{10, 1000, 100000} { + b.Run(fmt.Sprintf("%dx%d", w, h), func(b *testing.B) { + b.Run("new", func(b *testing.B) { + b.ReportAllocs() + for i := 0; i < b.N; i++ { + w := NewWriter(ioutil.Discard, 4, 4, 1, ' ', 0) // no particular reason for these settings + // Write the line h times. + for j := 0; j < h; j++ { + w.Write(line) + } + w.Flush() + } + }) + + b.Run("reuse", func(b *testing.B) { + b.ReportAllocs() + w := NewWriter(ioutil.Discard, 4, 4, 1, ' ', 0) // no particular reason for these settings + for i := 0; i < b.N; i++ { + // Write the line h times. + for j := 0; j < h; j++ { + w.Write(line) + } + w.Flush() + } + }) + }) + } + } +} + +func BenchmarkPyramid(b *testing.B) { + for _, x := range [...]int{10, 100, 1000} { + // Build a line with x cells. + line := bytes.Repeat([]byte("a\t"), x) + line = append(line, '\n') + b.Run(fmt.Sprintf("%d", x), func(b *testing.B) { + b.ReportAllocs() + for i := 0; i < b.N; i++ { + w := NewWriter(ioutil.Discard, 4, 4, 1, ' ', 0) // no particular reason for these settings + // Write increasing prefixes of that line. + for j := 0; j < x; j++ { + w.Write(line[:j*2]) + } + w.Flush() + } + }) + } +} + +func BenchmarkRagged(b *testing.B) { + var lines [8][]byte + for i, w := range [8]int{6, 2, 9, 5, 5, 7, 3, 8} { + // Build a line with x cells. + lines[i] = bytes.Repeat([]byte("a\t"), w) + lines[i] = append(lines[i], '\n') + } + for _, h := range [...]int{10, 100, 1000} { + b.Run(fmt.Sprintf("%d", h), func(b *testing.B) { + b.ReportAllocs() + for i := 0; i < b.N; i++ { + w := NewWriter(ioutil.Discard, 4, 4, 1, ' ', 0) // no particular reason for these settings + // Write the lines in turn h times. + for j := 0; j < h; j++ { + w.Write(lines[j%len(lines)]) + } + w.Flush() + } + }) + } +}