From 38458ce3fea94ed9c89eb2dc78be9bc79b2b0c66 Mon Sep 17 00:00:00 2001 From: Shenghou Ma Date: Sun, 18 Nov 2012 02:23:34 +0800 Subject: [PATCH] crypto/md5: speed up aligned writes and test/bench unaligned writes Write() can safely use uint32 loads when input is aligned. Also add test and benchmarks for unaligned writes. Benchmark result obtained by Dave Cheney on ARMv5TE @ 1.2GHz: benchmark old ns/op new ns/op delta BenchmarkHash8Bytes 4104 3417 -16.74% BenchmarkHash1K 22061 11208 -49.20% BenchmarkHash8K 146630 65148 -55.57% BenchmarkHash8BytesUnaligned 4128 3436 -16.76% BenchmarkHash1KUnaligned 22054 21473 -2.63% BenchmarkHash8KUnaligned 146658 146909 +0.17% benchmark old MB/s new MB/s speedup BenchmarkHash8Bytes 1.95 2.34 1.20x BenchmarkHash1K 46.42 91.36 1.97x BenchmarkHash8K 55.87 125.74 2.25x BenchmarkHash8BytesUnaligned 1.94 2.33 1.20x BenchmarkHash1KUnaligned 46.43 47.69 1.03x BenchmarkHash8KUnaligned 55.86 55.76 1.00x R=golang-dev, dave, bradfitz CC=golang-dev https://golang.org/cl/6782072 --- src/pkg/crypto/md5/gen.go | 2 ++ src/pkg/crypto/md5/md5_test.go | 42 +++++++++++++++++++++++++++------- src/pkg/crypto/md5/md5block.go | 2 ++ 3 files changed, 38 insertions(+), 8 deletions(-) diff --git a/src/pkg/crypto/md5/gen.go b/src/pkg/crypto/md5/gen.go index 1a9c4ab33d..966bdae267 100644 --- a/src/pkg/crypto/md5/gen.go +++ b/src/pkg/crypto/md5/gen.go @@ -203,6 +203,8 @@ func block(dig *digest, p []byte) { // less code and run 1.3x faster if we take advantage of that. // My apologies. X = (*[16]uint32)(unsafe.Pointer(&p[0])) + } else if uintptr(unsafe.Pointer(&p[0]))&(unsafe.Alignof(uint32(0))-1) == 0 { + X = (*[16]uint32)(unsafe.Pointer(&p[0])) } else { X = &xbuf j := 0 diff --git a/src/pkg/crypto/md5/md5_test.go b/src/pkg/crypto/md5/md5_test.go index c810251ff2..cac39ad054 100644 --- a/src/pkg/crypto/md5/md5_test.go +++ b/src/pkg/crypto/md5/md5_test.go @@ -9,6 +9,7 @@ import ( "fmt" "io" "testing" + "unsafe" ) type md5Test struct { @@ -54,13 +55,19 @@ func TestGolden(t *testing.T) { for i := 0; i < len(golden); i++ { g := golden[i] c := md5.New() - for j := 0; j < 3; j++ { + buf := make([]byte, len(g.in)+4) + for j := 0; j < 3+4; j++ { if j < 2 { io.WriteString(c, g.in) - } else { + } else if j == 2 { io.WriteString(c, g.in[0:len(g.in)/2]) c.Sum(nil) io.WriteString(c, g.in[len(g.in)/2:]) + } else if j > 2 { + // test unaligned write + buf = buf[1:] + copy(buf, g.in) + c.Write(buf[:len(g.in)]) } s := fmt.Sprintf("%x", c.Sum(nil)) if s != g.out { @@ -80,11 +87,18 @@ func ExampleNew() { } var bench = md5.New() -var buf = make([]byte, 8192) +var buf = make([]byte, 8192+1) +var sum = make([]byte, bench.Size()) -func benchmarkSize(b *testing.B, size int) { +func benchmarkSize(b *testing.B, size int, unaligned bool) { b.SetBytes(int64(size)) - sum := make([]byte, bench.Size()) + buf := buf + if unaligned { + if uintptr(unsafe.Pointer(&buf[0]))&(unsafe.Alignof(uint32(0))-1) == 0 { + buf = buf[1:] + } + } + b.ResetTimer() for i := 0; i < b.N; i++ { bench.Reset() bench.Write(buf[:size]) @@ -93,13 +107,25 @@ func benchmarkSize(b *testing.B, size int) { } func BenchmarkHash8Bytes(b *testing.B) { - benchmarkSize(b, 8) + benchmarkSize(b, 8, false) } func BenchmarkHash1K(b *testing.B) { - benchmarkSize(b, 1024) + benchmarkSize(b, 1024, false) } func BenchmarkHash8K(b *testing.B) { - benchmarkSize(b, 8192) + benchmarkSize(b, 8192, false) +} + +func BenchmarkHash8BytesUnaligned(b *testing.B) { + benchmarkSize(b, 8, true) +} + +func BenchmarkHash1KUnaligned(b *testing.B) { + benchmarkSize(b, 1024, true) +} + +func BenchmarkHash8KUnaligned(b *testing.B) { + benchmarkSize(b, 8192, true) } diff --git a/src/pkg/crypto/md5/md5block.go b/src/pkg/crypto/md5/md5block.go index 5dbdf5606b..59f8f6f5af 100644 --- a/src/pkg/crypto/md5/md5block.go +++ b/src/pkg/crypto/md5/md5block.go @@ -22,6 +22,8 @@ func block(dig *digest, p []byte) { // less code and run 1.3x faster if we take advantage of that. // My apologies. X = (*[16]uint32)(unsafe.Pointer(&p[0])) + } else if uintptr(unsafe.Pointer(&p[0]))&(unsafe.Alignof(uint32(0))-1) == 0 { + X = (*[16]uint32)(unsafe.Pointer(&p[0])) } else { X = &xbuf j := 0 -- 2.50.0