Use copy rather than a hand rolled loop when moving a partial input
block to the scratch area. This results in a reasonable performance
gain when partial blocks are written.
Benchmarks on Intel(R) Xeon(R) CPU X5650 @ 2.67GHz with Go amd64:
benchmark old MB/s new MB/s speedup
SHA1 BenchmarkHash8Bytes 18.37 22.80 1.24x
SHA256 BenchmarkHash8Bytes 11.86 13.78 1.16x
SHA512 BenchmarkHash8Bytes 4.51 5.24 1.16x
benchmark old ns/op new ns/op delta
SHA1 BenchmarkHash8Bytes 435 350 -19.54%
SHA256 BenchmarkHash8Bytes 674 580 -13.95%
SHA512 BenchmarkHash8Bytes 1772 1526 -13.88%
R=agl, dave, bradfitz
CC=golang-codereviews
https://golang.org/cl/
35840044
nn = len(p)
d.len += uint64(nn)
if d.nx > 0 {
- n := len(p)
- if n > chunk-d.nx {
- n = chunk - d.nx
- }
- for i := 0; i < n; i++ {
- d.x[d.nx+i] = p[i]
- }
+ n := copy(d.x[d.nx:], p)
d.nx += n
if d.nx == chunk {
- block(d, d.x[0:])
+ block(d, d.x[:])
d.nx = 0
}
p = p[n:]
nn = len(p)
d.len += uint64(nn)
if d.nx > 0 {
- n := len(p)
- if n > chunk-d.nx {
- n = chunk - d.nx
- }
- for i := 0; i < n; i++ {
- d.x[d.nx+i] = p[i]
- }
+ n := copy(d.x[d.nx:], p)
d.nx += n
if d.nx == chunk {
- block(d, d.x[0:])
+ block(d, d.x[:])
d.nx = 0
}
p = p[n:]
nn = len(p)
d.len += uint64(nn)
if d.nx > 0 {
- n := len(p)
- if n > chunk-d.nx {
- n = chunk - d.nx
- }
- for i := 0; i < n; i++ {
- d.x[d.nx+i] = p[i]
- }
+ n := copy(d.x[d.nx:], p)
d.nx += n
if d.nx == chunk {
- block(d, d.x[0:])
+ block(d, d.x[:])
d.nx = 0
}
p = p[n:]