From: Radu Berinde Date: Tue, 13 May 2025 00:45:25 +0000 (+0000) Subject: crypto: limit md5 or sha256 blocks processed at once in assembly X-Git-Tag: go1.25rc1~289 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=698f86139bf72bcf7cbf08accc1c34394cb57acb;p=gostls13.git crypto: limit md5 or sha256 blocks processed at once in assembly This change limits the amount of data that can be hashed at once - the assembly routines are not preemptible and can result in large latency outliers when part of a larger system. Benchmarks for sha256 (on an arm64 M1): name old speed new speed delta Hash8Bytes/New-10 178MB/s ± 0% 178MB/s ± 0% +0.16% (p=0.002 n=9+8) Hash8Bytes/Sum224-10 154MB/s ± 0% 154MB/s ± 0% ~ (p=0.287 n=9+10) Hash8Bytes/Sum256-10 156MB/s ± 0% 157MB/s ± 0% +0.13% (p=0.004 n=9+8) Hash1K/New-10 2.28GB/s ± 0% 2.28GB/s ± 0% ~ (p=0.968 n=10+9) Hash1K/Sum224-10 2.20GB/s ± 0% 2.21GB/s ± 0% +0.30% (p=0.001 n=9+9) Hash1K/Sum256-10 2.21GB/s ± 0% 2.21GB/s ± 0% +0.26% (p=0.000 n=9+8) Hash8K/New-10 2.37GB/s ± 2% 2.40GB/s ± 0% ~ (p=0.289 n=10+10) Hash8K/Sum224-10 2.39GB/s ± 0% 2.39GB/s ± 0% ~ (p=0.983 n=8+9) Hash8K/Sum256-10 2.39GB/s ± 0% 2.39GB/s ± 0% ~ (p=0.905 n=9+10) Hash256K/New-10 2.42GB/s ± 0% 2.42GB/s ± 0% ~ (p=0.250 n=9+10) Hash256K/Sum224-10 2.42GB/s ± 0% 2.42GB/s ± 0% ~ (p=0.093 n=8+9) Hash256K/Sum256-10 2.42GB/s ± 0% 2.42GB/s ± 0% ~ (p=0.211 n=10+9) Hash1M/New-10 2.42GB/s ± 0% 2.42GB/s ± 0% ~ (p=0.963 n=8+9) Hash1M/Sum224-10 2.42GB/s ± 0% 2.42GB/s ± 0% ~ (p=0.173 n=10+8) Hash1M/Sum256-10 2.42GB/s ± 0% 2.42GB/s ± 0% ~ (p=0.743 n=9+8) Note that `Hash8K` shows that a 8K block size is sufficient to achieve peak bandwidth, so the 64KB maxAsmSize should be plenty. Benchmarks for md5: name old speed new speed delta Hash1M-10 669MB/s ± 0% 669MB/s ± 0% ~ (p=0.965 n=8+10) Hash8M-10 667MB/s ± 0% 666MB/s ± 0% ~ (p=0.356 n=10+9) Fixes #64417 Change-Id: If7f5e7587b33c65148f49859c9d46ae6f6948db4 GitHub-Last-Rev: 2f83f4255412b533469e953db6c1ef16fa3eb7c2 GitHub-Pull-Request: golang/go#73638 Reviewed-on: https://go-review.googlesource.com/c/go/+/671098 Reviewed-by: Jorropo LUCI-TryBot-Result: Go LUCI Reviewed-by: Michael Knyszek Reviewed-by: Filippo Valsorda Reviewed-by: Ian Stapleton Cordasco Reviewed-by: Cherry Mui --- diff --git a/src/crypto/internal/fips140/sha256/sha256.go b/src/crypto/internal/fips140/sha256/sha256.go index e8c7c25f06..bc157f9adb 100644 --- a/src/crypto/internal/fips140/sha256/sha256.go +++ b/src/crypto/internal/fips140/sha256/sha256.go @@ -21,6 +21,11 @@ const size224 = 28 // The block size of SHA-256 and SHA-224 in bytes. const blockSize = 64 +// The maximum number of bytes that can be passed to block(). The limit exists +// because implementations that rely on assembly routines are not preemptible. +const maxAsmIters = 1024 +const maxAsmSize = blockSize * maxAsmIters // 64KiB + const ( chunk = 64 init0 = 0x6A09E667 @@ -172,6 +177,11 @@ func (d *Digest) Write(p []byte) (nn int, err error) { } if len(p) >= chunk { n := len(p) &^ (chunk - 1) + for n > maxAsmSize { + block(d, p[:maxAsmSize]) + p = p[maxAsmSize:] + n -= maxAsmSize + } block(d, p[:n]) p = p[n:] } diff --git a/src/crypto/md5/md5.go b/src/crypto/md5/md5.go index a0384e175f..dc586fb217 100644 --- a/src/crypto/md5/md5.go +++ b/src/crypto/md5/md5.go @@ -28,6 +28,11 @@ const Size = 16 // The blocksize of MD5 in bytes. const BlockSize = 64 +// The maximum number of bytes that can be passed to block(). The limit exists +// because implementations that rely on assembly routines are not preemptible. +const maxAsmIters = 1024 +const maxAsmSize = BlockSize * maxAsmIters // 64KiB + const ( init0 = 0x67452301 init1 = 0xEFCDAB89 @@ -138,6 +143,11 @@ func (d *digest) Write(p []byte) (nn int, err error) { if len(p) >= BlockSize { n := len(p) &^ (BlockSize - 1) if haveAsm { + for n > maxAsmSize { + block(d, p[:maxAsmSize]) + p = p[maxAsmSize:] + n -= maxAsmSize + } block(d, p[:n]) } else { blockGeneric(d, p[:n]) diff --git a/src/crypto/md5/md5_test.go b/src/crypto/md5/md5_test.go index 2353ea85b5..c0bb15f05b 100644 --- a/src/crypto/md5/md5_test.go +++ b/src/crypto/md5/md5_test.go @@ -133,10 +133,11 @@ func TestGoldenMarshal(t *testing.T) { func TestLarge(t *testing.T) { const N = 10000 + const offsets = 4 ok := "2bb571599a4180e1d542f76904adc3df" // md5sum of "0123456789" * 1000 - block := make([]byte, 10004) + block := make([]byte, N+offsets) c := New() - for offset := 0; offset < 4; offset++ { + for offset := 0; offset < offsets; offset++ { for i := 0; i < N; i++ { block[offset+i] = '0' + byte(i%10) } @@ -155,6 +156,31 @@ func TestLarge(t *testing.T) { } } +func TestExtraLarge(t *testing.T) { + const N = 100000 + const offsets = 4 + ok := "13572e9e296cff52b79c52148313c3a5" // md5sum of "0123456789" * 10000 + block := make([]byte, N+offsets) + c := New() + for offset := 0; offset < offsets; offset++ { + for i := 0; i < N; i++ { + block[offset+i] = '0' + byte(i%10) + } + for blockSize := 10; blockSize <= N; blockSize *= 10 { + blocks := N / blockSize + b := block[offset : offset+blockSize] + c.Reset() + for i := 0; i < blocks; i++ { + c.Write(b) + } + s := fmt.Sprintf("%x", c.Sum(nil)) + if s != ok { + t.Fatalf("md5 TestExtraLarge offset=%d, blockSize=%d = %s want %s", offset, blockSize, s, ok) + } + } + } +} + // Tests that blockGeneric (pure Go) and block (in assembly for amd64, 386, arm) match. func TestBlockGeneric(t *testing.T) { gen, asm := New().(*digest), New().(*digest) diff --git a/src/crypto/sha256/sha256_test.go b/src/crypto/sha256/sha256_test.go index b3b4e77f57..38a7f25afb 100644 --- a/src/crypto/sha256/sha256_test.go +++ b/src/crypto/sha256/sha256_test.go @@ -200,6 +200,56 @@ func testGoldenMarshal(t *testing.T) { } } +func TestLarge(t *testing.T) { + const N = 10000 + const offsets = 4 + ok := "4c207598af7a20db0e3334dd044399a40e467cb81b37f7ba05a4f76dcbd8fd59" // sha256sum of "0123456789" * 1000 + block := make([]byte, N+offsets) + c := New() + for offset := 0; offset < offsets; offset++ { + for i := 0; i < N; i++ { + block[offset+i] = '0' + byte(i%10) + } + for blockSize := 10; blockSize <= N; blockSize *= 10 { + blocks := N / blockSize + b := block[offset : offset+blockSize] + c.Reset() + for i := 0; i < blocks; i++ { + c.Write(b) + } + s := fmt.Sprintf("%x", c.Sum(nil)) + if s != ok { + t.Fatalf("sha256 TestLarge offset=%d, blockSize=%d = %s want %s", offset, blockSize, s, ok) + } + } + } +} + +func TestExtraLarge(t *testing.T) { + const N = 100000 + const offsets = 4 + ok := "aca9e593cc629cbaa94cd5a07dc029424aad93e5129e5d11f8dcd2f139c16cc0" // sha256sum of "0123456789" * 10000 + block := make([]byte, N+offsets) + c := New() + for offset := 0; offset < offsets; offset++ { + for i := 0; i < N; i++ { + block[offset+i] = '0' + byte(i%10) + } + for blockSize := 10; blockSize <= N; blockSize *= 10 { + blocks := N / blockSize + b := block[offset : offset+blockSize] + c.Reset() + for i := 0; i < blocks; i++ { + c.Write(b) + } + s := fmt.Sprintf("%x", c.Sum(nil)) + if s != ok { + t.Fatalf("sha256 TestExtraLarge offset=%d, blockSize=%d = %s want %s", offset, blockSize, s, ok) + } + } + } +} + func TestMarshalTypeMismatch(t *testing.T) { h1 := New() h2 := New224() @@ -366,16 +416,16 @@ func TestExtraMethods(t *testing.T) { } var bench = New() -var buf = make([]byte, 8192) func benchmarkSize(b *testing.B, size int) { + buf := make([]byte, size) sum := make([]byte, bench.Size()) b.Run("New", func(b *testing.B) { b.ReportAllocs() b.SetBytes(int64(size)) for i := 0; i < b.N; i++ { bench.Reset() - bench.Write(buf[:size]) + bench.Write(buf) bench.Sum(sum[:0]) } }) @@ -383,14 +433,14 @@ func benchmarkSize(b *testing.B, size int) { b.ReportAllocs() b.SetBytes(int64(size)) for i := 0; i < b.N; i++ { - Sum224(buf[:size]) + Sum224(buf) } }) b.Run("Sum256", func(b *testing.B) { b.ReportAllocs() b.SetBytes(int64(size)) for i := 0; i < b.N; i++ { - Sum256(buf[:size]) + Sum256(buf) } }) } @@ -406,3 +456,11 @@ func BenchmarkHash1K(b *testing.B) { func BenchmarkHash8K(b *testing.B) { benchmarkSize(b, 8192) } + +func BenchmarkHash256K(b *testing.B) { + benchmarkSize(b, 256*1024) +} + +func BenchmarkHash1M(b *testing.B) { + benchmarkSize(b, 1024*1024) +}