From ffd0d02d099d07db2e98dc6e91323cc51a57c124 Mon Sep 17 00:00:00 2001 From: Nigel Tao Date: Wed, 9 May 2012 08:57:54 +1000 Subject: [PATCH] compress/flate: benchmark some English text, not just the digits of e. The testdata/e.txt input is repeated on the longer benchmarks, but the length of that data is less than flate's window size, so the numbers are essentially measuring the performance of a trivial compression. A follow-up CL will add more data to testdata/e.txt. Sample output on my laptop (linux, amd64): BenchmarkDecodeDigitsSpeed1e4 5000 603153 ns/op 16.58 MB/s BenchmarkDecodeDigitsSpeed1e5 1000 1465602 ns/op 68.23 MB/s BenchmarkDecodeDigitsSpeed1e6 200 8036050 ns/op 124.44 MB/s BenchmarkDecodeDigitsDefault1e4 5000 581796 ns/op 17.19 MB/s BenchmarkDecodeDigitsDefault1e5 2000 846653 ns/op 118.11 MB/s BenchmarkDecodeDigitsDefault1e6 500 3385782 ns/op 295.35 MB/s BenchmarkDecodeDigitsCompress1e4 5000 581180 ns/op 17.21 MB/s BenchmarkDecodeDigitsCompress1e5 2000 846209 ns/op 118.17 MB/s BenchmarkDecodeDigitsCompress1e6 500 3386174 ns/op 295.32 MB/s BenchmarkDecodeTwainSpeed1e4 5000 643563 ns/op 15.54 MB/s BenchmarkDecodeTwainSpeed1e5 500 5418408 ns/op 18.46 MB/s BenchmarkDecodeTwainSpeed1e6 50 52277520 ns/op 19.13 MB/s BenchmarkDecodeTwainDefault1e4 5000 583551 ns/op 17.14 MB/s BenchmarkDecodeTwainDefault1e5 500 4443428 ns/op 22.51 MB/s BenchmarkDecodeTwainDefault1e6 50 41862080 ns/op 23.89 MB/s BenchmarkDecodeTwainCompress1e4 5000 583490 ns/op 17.14 MB/s BenchmarkDecodeTwainCompress1e5 500 4426356 ns/op 22.59 MB/s BenchmarkDecodeTwainCompress1e6 50 41657940 ns/op 24.01 MB/s BenchmarkEncodeDigitsSpeed1e4 2000 1230907 ns/op 8.12 MB/s BenchmarkEncodeDigitsSpeed1e5 1000 2319129 ns/op 43.12 MB/s BenchmarkEncodeDigitsSpeed1e6 100 12378950 ns/op 80.78 MB/s BenchmarkEncodeDigitsDefault1e4 1000 1597865 ns/op 6.26 MB/s BenchmarkEncodeDigitsDefault1e5 500 3163458 ns/op 31.61 MB/s BenchmarkEncodeDigitsDefault1e6 100 18770240 ns/op 53.28 MB/s BenchmarkEncodeDigitsCompress1e4 1000 1603461 ns/op 6.24 MB/s BenchmarkEncodeDigitsCompress1e5 500 3168766 ns/op 31.56 MB/s BenchmarkEncodeDigitsCompress1e6 100 18855830 ns/op 53.03 MB/s BenchmarkEncodeTwainSpeed1e4 1000 1338049 ns/op 7.47 MB/s BenchmarkEncodeTwainSpeed1e5 500 7341622 ns/op 13.62 MB/s BenchmarkEncodeTwainSpeed1e6 50 67484600 ns/op 14.82 MB/s BenchmarkEncodeTwainDefault1e4 1000 1778399 ns/op 5.62 MB/s BenchmarkEncodeTwainDefault1e5 100 23261810 ns/op 4.30 MB/s BenchmarkEncodeTwainDefault1e6 10 243533600 ns/op 4.11 MB/s BenchmarkEncodeTwainCompress1e4 1000 1795469 ns/op 5.57 MB/s BenchmarkEncodeTwainCompress1e5 50 29447140 ns/op 3.40 MB/s BenchmarkEncodeTwainCompress1e6 5 321686800 ns/op 3.11 MB/s ok compress/flate 89.246s R=rsc, r CC=golang-dev https://golang.org/cl/6195055 --- src/pkg/compress/flate/reader_test.go | 92 ++++++++++++++++----------- src/pkg/compress/flate/writer_test.go | 64 ++++++++----------- 2 files changed, 81 insertions(+), 75 deletions(-) diff --git a/src/pkg/compress/flate/reader_test.go b/src/pkg/compress/flate/reader_test.go index 560d7e87bb..d6d943bd9d 100644 --- a/src/pkg/compress/flate/reader_test.go +++ b/src/pkg/compress/flate/reader_test.go @@ -12,20 +12,47 @@ import ( "testing" ) -func benchmarkDecoder(b *testing.B, level, n int) { +const ( + digits = iota + twain +) + +var testfiles = []string{ + // Digits is the digits of the irrational number e. Its decimal representation + // does not repeat, but there are only 10 posible digits, so it should be + // reasonably compressible. + // + // TODO(nigeltao): e.txt is only 10K long, so when benchmarking 100K or 1000K + // of input, the digits are just repeated from the beginning, and flate can + // trivially compress this as a length/distance copy operation. Thus, + // BenchmarkDecodeDigitsXxx1e6 is essentially just measuring the speed of the + // forwardCopy implementation, but isn't particularly representative of real + // usage. The TODO is to replace e.txt with 100K digits, not just 10K digits, + // since that's larger than the windowSize 1<<15 (= 32768). + digits: "../testdata/e.txt", + // Twain is Project Gutenberg's edition of Mark Twain's classic English novel. + twain: "../testdata/Mark.Twain-Tom.Sawyer.txt", +} + +func benchmarkDecode(b *testing.B, testfile, level, n int) { b.StopTimer() b.SetBytes(int64(n)) - buf0, err := ioutil.ReadFile("../testdata/e.txt") + buf0, err := ioutil.ReadFile(testfiles[testfile]) if err != nil { b.Fatal(err) } - buf0 = buf0[:10000] + if len(buf0) == 0 { + b.Fatalf("test file %q has no data", testfiles[testfile]) + } compressed := new(bytes.Buffer) w, err := NewWriter(compressed, level) if err != nil { b.Fatal(err) } for i := 0; i < n; i += len(buf0) { + if len(buf0) > n-i { + buf0 = buf0[:n-i] + } io.Copy(w, bytes.NewBuffer(buf0)) } w.Close() @@ -38,38 +65,29 @@ func benchmarkDecoder(b *testing.B, level, n int) { } } -func BenchmarkDecoderBestSpeed1K(b *testing.B) { - benchmarkDecoder(b, BestSpeed, 1e4) -} - -func BenchmarkDecoderBestSpeed10K(b *testing.B) { - benchmarkDecoder(b, BestSpeed, 1e5) -} - -func BenchmarkDecoderBestSpeed100K(b *testing.B) { - benchmarkDecoder(b, BestSpeed, 1e6) -} - -func BenchmarkDecoderDefaultCompression1K(b *testing.B) { - benchmarkDecoder(b, DefaultCompression, 1e4) -} - -func BenchmarkDecoderDefaultCompression10K(b *testing.B) { - benchmarkDecoder(b, DefaultCompression, 1e5) -} - -func BenchmarkDecoderDefaultCompression100K(b *testing.B) { - benchmarkDecoder(b, DefaultCompression, 1e6) -} - -func BenchmarkDecoderBestCompression1K(b *testing.B) { - benchmarkDecoder(b, BestCompression, 1e4) -} - -func BenchmarkDecoderBestCompression10K(b *testing.B) { - benchmarkDecoder(b, BestCompression, 1e5) -} +// These short names are so that gofmt doesn't break the BenchmarkXxx function +// bodies below over multiple lines. +const ( + speed = BestSpeed + default_ = DefaultCompression + compress = BestCompression +) -func BenchmarkDecoderBestCompression100K(b *testing.B) { - benchmarkDecoder(b, BestCompression, 1e6) -} +func BenchmarkDecodeDigitsSpeed1e4(b *testing.B) { benchmarkDecode(b, digits, speed, 1e4) } +func BenchmarkDecodeDigitsSpeed1e5(b *testing.B) { benchmarkDecode(b, digits, speed, 1e5) } +func BenchmarkDecodeDigitsSpeed1e6(b *testing.B) { benchmarkDecode(b, digits, speed, 1e6) } +func BenchmarkDecodeDigitsDefault1e4(b *testing.B) { benchmarkDecode(b, digits, default_, 1e4) } +func BenchmarkDecodeDigitsDefault1e5(b *testing.B) { benchmarkDecode(b, digits, default_, 1e5) } +func BenchmarkDecodeDigitsDefault1e6(b *testing.B) { benchmarkDecode(b, digits, default_, 1e6) } +func BenchmarkDecodeDigitsCompress1e4(b *testing.B) { benchmarkDecode(b, digits, compress, 1e4) } +func BenchmarkDecodeDigitsCompress1e5(b *testing.B) { benchmarkDecode(b, digits, compress, 1e5) } +func BenchmarkDecodeDigitsCompress1e6(b *testing.B) { benchmarkDecode(b, digits, compress, 1e6) } +func BenchmarkDecodeTwainSpeed1e4(b *testing.B) { benchmarkDecode(b, twain, speed, 1e4) } +func BenchmarkDecodeTwainSpeed1e5(b *testing.B) { benchmarkDecode(b, twain, speed, 1e5) } +func BenchmarkDecodeTwainSpeed1e6(b *testing.B) { benchmarkDecode(b, twain, speed, 1e6) } +func BenchmarkDecodeTwainDefault1e4(b *testing.B) { benchmarkDecode(b, twain, default_, 1e4) } +func BenchmarkDecodeTwainDefault1e5(b *testing.B) { benchmarkDecode(b, twain, default_, 1e5) } +func BenchmarkDecodeTwainDefault1e6(b *testing.B) { benchmarkDecode(b, twain, default_, 1e6) } +func BenchmarkDecodeTwainCompress1e4(b *testing.B) { benchmarkDecode(b, twain, compress, 1e4) } +func BenchmarkDecodeTwainCompress1e5(b *testing.B) { benchmarkDecode(b, twain, compress, 1e5) } +func BenchmarkDecodeTwainCompress1e6(b *testing.B) { benchmarkDecode(b, twain, compress, 1e6) } diff --git a/src/pkg/compress/flate/writer_test.go b/src/pkg/compress/flate/writer_test.go index 52d9cda0fb..58431774e0 100644 --- a/src/pkg/compress/flate/writer_test.go +++ b/src/pkg/compress/flate/writer_test.go @@ -10,16 +10,21 @@ import ( "testing" ) -func benchmarkEncoder(b *testing.B, level, n int) { +func benchmarkEncoder(b *testing.B, testfile, level, n int) { b.StopTimer() b.SetBytes(int64(n)) - buf0, err := ioutil.ReadFile("../testdata/e.txt") + buf0, err := ioutil.ReadFile(testfiles[testfile]) if err != nil { b.Fatal(err) } - buf0 = buf0[:10000] + if len(buf0) == 0 { + b.Fatalf("test file %q has no data", testfiles[testfile]) + } buf1 := make([]byte, n) for i := 0; i < n; i += len(buf0) { + if len(buf0) > n-i { + buf0 = buf0[:n-i] + } copy(buf1[i:], buf0) } buf0 = nil @@ -35,38 +40,21 @@ func benchmarkEncoder(b *testing.B, level, n int) { } } -func BenchmarkEncoderBestSpeed1K(b *testing.B) { - benchmarkEncoder(b, BestSpeed, 1e4) -} - -func BenchmarkEncoderBestSpeed10K(b *testing.B) { - benchmarkEncoder(b, BestSpeed, 1e5) -} - -func BenchmarkEncoderBestSpeed100K(b *testing.B) { - benchmarkEncoder(b, BestSpeed, 1e6) -} - -func BenchmarkEncoderDefaultCompression1K(b *testing.B) { - benchmarkEncoder(b, DefaultCompression, 1e4) -} - -func BenchmarkEncoderDefaultCompression10K(b *testing.B) { - benchmarkEncoder(b, DefaultCompression, 1e5) -} - -func BenchmarkEncoderDefaultCompression100K(b *testing.B) { - benchmarkEncoder(b, DefaultCompression, 1e6) -} - -func BenchmarkEncoderBestCompression1K(b *testing.B) { - benchmarkEncoder(b, BestCompression, 1e4) -} - -func BenchmarkEncoderBestCompression10K(b *testing.B) { - benchmarkEncoder(b, BestCompression, 1e5) -} - -func BenchmarkEncoderBestCompression100K(b *testing.B) { - benchmarkEncoder(b, BestCompression, 1e6) -} +func BenchmarkEncodeDigitsSpeed1e4(b *testing.B) { benchmarkEncoder(b, digits, speed, 1e4) } +func BenchmarkEncodeDigitsSpeed1e5(b *testing.B) { benchmarkEncoder(b, digits, speed, 1e5) } +func BenchmarkEncodeDigitsSpeed1e6(b *testing.B) { benchmarkEncoder(b, digits, speed, 1e6) } +func BenchmarkEncodeDigitsDefault1e4(b *testing.B) { benchmarkEncoder(b, digits, default_, 1e4) } +func BenchmarkEncodeDigitsDefault1e5(b *testing.B) { benchmarkEncoder(b, digits, default_, 1e5) } +func BenchmarkEncodeDigitsDefault1e6(b *testing.B) { benchmarkEncoder(b, digits, default_, 1e6) } +func BenchmarkEncodeDigitsCompress1e4(b *testing.B) { benchmarkEncoder(b, digits, compress, 1e4) } +func BenchmarkEncodeDigitsCompress1e5(b *testing.B) { benchmarkEncoder(b, digits, compress, 1e5) } +func BenchmarkEncodeDigitsCompress1e6(b *testing.B) { benchmarkEncoder(b, digits, compress, 1e6) } +func BenchmarkEncodeTwainSpeed1e4(b *testing.B) { benchmarkEncoder(b, twain, speed, 1e4) } +func BenchmarkEncodeTwainSpeed1e5(b *testing.B) { benchmarkEncoder(b, twain, speed, 1e5) } +func BenchmarkEncodeTwainSpeed1e6(b *testing.B) { benchmarkEncoder(b, twain, speed, 1e6) } +func BenchmarkEncodeTwainDefault1e4(b *testing.B) { benchmarkEncoder(b, twain, default_, 1e4) } +func BenchmarkEncodeTwainDefault1e5(b *testing.B) { benchmarkEncoder(b, twain, default_, 1e5) } +func BenchmarkEncodeTwainDefault1e6(b *testing.B) { benchmarkEncoder(b, twain, default_, 1e6) } +func BenchmarkEncodeTwainCompress1e4(b *testing.B) { benchmarkEncoder(b, twain, compress, 1e4) } +func BenchmarkEncodeTwainCompress1e5(b *testing.B) { benchmarkEncoder(b, twain, compress, 1e5) } +func BenchmarkEncodeTwainCompress1e6(b *testing.B) { benchmarkEncoder(b, twain, compress, 1e6) } -- 2.48.1