]> Cypherpunks repositories - gostls13.git/commitdiff
crypto: limit md5 or sha256 blocks processed at once in assembly
authorRadu Berinde <radu@cockroachlabs.com>
Tue, 13 May 2025 00:45:25 +0000 (00:45 +0000)
committerRoland Shoemaker <roland@golang.org>
Tue, 13 May 2025 15:09:51 +0000 (08:09 -0700)
This change limits the amount of data that can be hashed at once - the
assembly routines are not preemptible and can result in large latency
outliers when part of a larger system.

Benchmarks for sha256 (on an arm64 M1):

name                  old speed      new speed      delta
Hash8Bytes/New-10      178MB/s ± 0%   178MB/s ± 0%  +0.16%  (p=0.002 n=9+8)
Hash8Bytes/Sum224-10   154MB/s ± 0%   154MB/s ± 0%    ~     (p=0.287 n=9+10)
Hash8Bytes/Sum256-10   156MB/s ± 0%   157MB/s ± 0%  +0.13%  (p=0.004 n=9+8)
Hash1K/New-10         2.28GB/s ± 0%  2.28GB/s ± 0%    ~     (p=0.968 n=10+9)
Hash1K/Sum224-10      2.20GB/s ± 0%  2.21GB/s ± 0%  +0.30%  (p=0.001 n=9+9)
Hash1K/Sum256-10      2.21GB/s ± 0%  2.21GB/s ± 0%  +0.26%  (p=0.000 n=9+8)
Hash8K/New-10         2.37GB/s ± 2%  2.40GB/s ± 0%    ~     (p=0.289 n=10+10)
Hash8K/Sum224-10      2.39GB/s ± 0%  2.39GB/s ± 0%    ~     (p=0.983 n=8+9)
Hash8K/Sum256-10      2.39GB/s ± 0%  2.39GB/s ± 0%    ~     (p=0.905 n=9+10)
Hash256K/New-10       2.42GB/s ± 0%  2.42GB/s ± 0%    ~     (p=0.250 n=9+10)
Hash256K/Sum224-10    2.42GB/s ± 0%  2.42GB/s ± 0%    ~     (p=0.093 n=8+9)
Hash256K/Sum256-10    2.42GB/s ± 0%  2.42GB/s ± 0%    ~     (p=0.211 n=10+9)
Hash1M/New-10         2.42GB/s ± 0%  2.42GB/s ± 0%    ~     (p=0.963 n=8+9)
Hash1M/Sum224-10      2.42GB/s ± 0%  2.42GB/s ± 0%    ~     (p=0.173 n=10+8)
Hash1M/Sum256-10      2.42GB/s ± 0%  2.42GB/s ± 0%    ~     (p=0.743 n=9+8)

Note that `Hash8K` shows that a 8K block size is sufficient to achieve
peak bandwidth, so the 64KB maxAsmSize should be plenty.

Benchmarks for md5:

name       old speed     new speed     delta
Hash1M-10  669MB/s ± 0%  669MB/s ± 0%   ~     (p=0.965 n=8+10)
Hash8M-10  667MB/s ± 0%  666MB/s ± 0%   ~     (p=0.356 n=10+9)

Fixes #64417

Change-Id: If7f5e7587b33c65148f49859c9d46ae6f6948db4
GitHub-Last-Rev: 2f83f4255412b533469e953db6c1ef16fa3eb7c2
GitHub-Pull-Request: golang/go#73638
Reviewed-on: https://go-review.googlesource.com/c/go/+/671098
Reviewed-by: Jorropo <jorropo.pgm@gmail.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Michael Knyszek <mknyszek@google.com>
Reviewed-by: Filippo Valsorda <filippo@golang.org>
Reviewed-by: Ian Stapleton Cordasco <graffatcolmingov@gmail.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
src/crypto/internal/fips140/sha256/sha256.go
src/crypto/md5/md5.go
src/crypto/md5/md5_test.go
src/crypto/sha256/sha256_test.go

index e8c7c25f06294f66c2b531d49bc3043481b1202a..bc157f9adbfc346101b155e97691e37fc3c68751 100644 (file)
@@ -21,6 +21,11 @@ const size224 = 28
 // The block size of SHA-256 and SHA-224 in bytes.
 const blockSize = 64
 
+// The maximum number of bytes that can be passed to block(). The limit exists
+// because implementations that rely on assembly routines are not preemptible.
+const maxAsmIters = 1024
+const maxAsmSize = blockSize * maxAsmIters // 64KiB
+
 const (
        chunk     = 64
        init0     = 0x6A09E667
@@ -172,6 +177,11 @@ func (d *Digest) Write(p []byte) (nn int, err error) {
        }
        if len(p) >= chunk {
                n := len(p) &^ (chunk - 1)
+               for n > maxAsmSize {
+                       block(d, p[:maxAsmSize])
+                       p = p[maxAsmSize:]
+                       n -= maxAsmSize
+               }
                block(d, p[:n])
                p = p[n:]
        }
index a0384e175f31bdb6215d610d8edfb954b46397a3..dc586fb21787c93ce07a5341c4bbf61962a540c4 100644 (file)
@@ -28,6 +28,11 @@ const Size = 16
 // The blocksize of MD5 in bytes.
 const BlockSize = 64
 
+// The maximum number of bytes that can be passed to block(). The limit exists
+// because implementations that rely on assembly routines are not preemptible.
+const maxAsmIters = 1024
+const maxAsmSize = BlockSize * maxAsmIters // 64KiB
+
 const (
        init0 = 0x67452301
        init1 = 0xEFCDAB89
@@ -138,6 +143,11 @@ func (d *digest) Write(p []byte) (nn int, err error) {
        if len(p) >= BlockSize {
                n := len(p) &^ (BlockSize - 1)
                if haveAsm {
+                       for n > maxAsmSize {
+                               block(d, p[:maxAsmSize])
+                               p = p[maxAsmSize:]
+                               n -= maxAsmSize
+                       }
                        block(d, p[:n])
                } else {
                        blockGeneric(d, p[:n])
index 2353ea85b54bafed7ad019caa234e37bd4afabaf..c0bb15f05bac7c8e26f21e6793f2c76b108fe9a2 100644 (file)
@@ -133,10 +133,11 @@ func TestGoldenMarshal(t *testing.T) {
 
 func TestLarge(t *testing.T) {
        const N = 10000
+       const offsets = 4
        ok := "2bb571599a4180e1d542f76904adc3df" // md5sum of "0123456789" * 1000
-       block := make([]byte, 10004)
+       block := make([]byte, N+offsets)
        c := New()
-       for offset := 0; offset < 4; offset++ {
+       for offset := 0; offset < offsets; offset++ {
                for i := 0; i < N; i++ {
                        block[offset+i] = '0' + byte(i%10)
                }
@@ -155,6 +156,31 @@ func TestLarge(t *testing.T) {
        }
 }
 
+func TestExtraLarge(t *testing.T) {
+       const N = 100000
+       const offsets = 4
+       ok := "13572e9e296cff52b79c52148313c3a5" // md5sum of "0123456789" * 10000
+       block := make([]byte, N+offsets)
+       c := New()
+       for offset := 0; offset < offsets; offset++ {
+               for i := 0; i < N; i++ {
+                       block[offset+i] = '0' + byte(i%10)
+               }
+               for blockSize := 10; blockSize <= N; blockSize *= 10 {
+                       blocks := N / blockSize
+                       b := block[offset : offset+blockSize]
+                       c.Reset()
+                       for i := 0; i < blocks; i++ {
+                               c.Write(b)
+                       }
+                       s := fmt.Sprintf("%x", c.Sum(nil))
+                       if s != ok {
+                               t.Fatalf("md5 TestExtraLarge offset=%d, blockSize=%d = %s want %s", offset, blockSize, s, ok)
+                       }
+               }
+       }
+}
+
 // Tests that blockGeneric (pure Go) and block (in assembly for amd64, 386, arm) match.
 func TestBlockGeneric(t *testing.T) {
        gen, asm := New().(*digest), New().(*digest)
index b3b4e77f578e603663b76bcd8e962450a396ec8c..38a7f25afb3fd1c4c3064e5939dbc681771aa239 100644 (file)
@@ -200,6 +200,56 @@ func testGoldenMarshal(t *testing.T) {
        }
 }
 
+func TestLarge(t *testing.T) {
+       const N = 10000
+       const offsets = 4
+       ok := "4c207598af7a20db0e3334dd044399a40e467cb81b37f7ba05a4f76dcbd8fd59" // sha256sum of "0123456789" * 1000
+       block := make([]byte, N+offsets)
+       c := New()
+       for offset := 0; offset < offsets; offset++ {
+               for i := 0; i < N; i++ {
+                       block[offset+i] = '0' + byte(i%10)
+               }
+               for blockSize := 10; blockSize <= N; blockSize *= 10 {
+                       blocks := N / blockSize
+                       b := block[offset : offset+blockSize]
+                       c.Reset()
+                       for i := 0; i < blocks; i++ {
+                               c.Write(b)
+                       }
+                       s := fmt.Sprintf("%x", c.Sum(nil))
+                       if s != ok {
+                               t.Fatalf("sha256 TestLarge offset=%d, blockSize=%d = %s want %s", offset, blockSize, s, ok)
+                       }
+               }
+       }
+}
+
+func TestExtraLarge(t *testing.T) {
+       const N = 100000
+       const offsets = 4
+       ok := "aca9e593cc629cbaa94cd5a07dc029424aad93e5129e5d11f8dcd2f139c16cc0" // sha256sum of "0123456789" * 10000
+       block := make([]byte, N+offsets)
+       c := New()
+       for offset := 0; offset < offsets; offset++ {
+               for i := 0; i < N; i++ {
+                       block[offset+i] = '0' + byte(i%10)
+               }
+               for blockSize := 10; blockSize <= N; blockSize *= 10 {
+                       blocks := N / blockSize
+                       b := block[offset : offset+blockSize]
+                       c.Reset()
+                       for i := 0; i < blocks; i++ {
+                               c.Write(b)
+                       }
+                       s := fmt.Sprintf("%x", c.Sum(nil))
+                       if s != ok {
+                               t.Fatalf("sha256 TestExtraLarge offset=%d, blockSize=%d = %s want %s", offset, blockSize, s, ok)
+                       }
+               }
+       }
+}
+
 func TestMarshalTypeMismatch(t *testing.T) {
        h1 := New()
        h2 := New224()
@@ -366,16 +416,16 @@ func TestExtraMethods(t *testing.T) {
 }
 
 var bench = New()
-var buf = make([]byte, 8192)
 
 func benchmarkSize(b *testing.B, size int) {
+       buf := make([]byte, size)
        sum := make([]byte, bench.Size())
        b.Run("New", func(b *testing.B) {
                b.ReportAllocs()
                b.SetBytes(int64(size))
                for i := 0; i < b.N; i++ {
                        bench.Reset()
-                       bench.Write(buf[:size])
+                       bench.Write(buf)
                        bench.Sum(sum[:0])
                }
        })
@@ -383,14 +433,14 @@ func benchmarkSize(b *testing.B, size int) {
                b.ReportAllocs()
                b.SetBytes(int64(size))
                for i := 0; i < b.N; i++ {
-                       Sum224(buf[:size])
+                       Sum224(buf)
                }
        })
        b.Run("Sum256", func(b *testing.B) {
                b.ReportAllocs()
                b.SetBytes(int64(size))
                for i := 0; i < b.N; i++ {
-                       Sum256(buf[:size])
+                       Sum256(buf)
                }
        })
 }
@@ -406,3 +456,11 @@ func BenchmarkHash1K(b *testing.B) {
 func BenchmarkHash8K(b *testing.B) {
        benchmarkSize(b, 8192)
 }
+
+func BenchmarkHash256K(b *testing.B) {
+       benchmarkSize(b, 256*1024)
+}
+
+func BenchmarkHash1M(b *testing.B) {
+       benchmarkSize(b, 1024*1024)
+}