]> Cypherpunks repositories - gostls13.git/commitdiff
crypto/aes: speed up AES by reducing allocations
authorMarten Seemann <martenseemann@gmail.com>
Sun, 4 Feb 2024 05:59:59 +0000 (12:59 +0700)
committerDamien Neil <dneil@google.com>
Mon, 25 Mar 2024 14:58:19 +0000 (14:58 +0000)
By embedding the arrays into aesCipher directly, we can save a few allocations
when creating a new AES cipher. This also avoids a lot of pointer chasing when
encrypting and decrypting, leading to 3-4% faster performance.

Fixes #65507.

name                     old time/op    new time/op    delta
Encrypt/AES-128-16         4.70ns ± 3%    4.46ns ± 2%   -5.08%  (p=0.000 n=10+10)
Encrypt/AES-192-16         4.90ns ± 3%    4.71ns ± 2%   -3.98%  (p=0.000 n=10+10)
Encrypt/AES-256-16         5.18ns ± 3%    4.91ns ± 1%   -5.15%  (p=0.000 n=10+10)
Decrypt/AES-128-16         4.51ns ± 2%    4.37ns ± 1%   -3.01%  (p=0.000 n=10+10)
Decrypt/AES-192-16         4.77ns ± 2%    4.63ns ± 2%   -3.05%  (p=0.000 n=10+10)
Decrypt/AES-256-16         5.10ns ± 2%    4.93ns ± 3%   -3.27%  (p=0.000 n=10+10)
Expand/AES-128-16          52.5ns ± 4%    55.9ns ± 3%   +6.58%  (p=0.000 n=10+10)
Expand/AES-192-16          45.6ns ± 5%    48.1ns ± 2%   +5.56%  (p=0.000 n=10+10)
Expand/AES-256-16          69.2ns ± 5%    71.7ns ± 3%   +3.61%  (p=0.006 n=9+10)
CreateCipher/AES-128-16     130ns ± 3%      84ns ± 3%  -35.13%  (p=0.000 n=10+10)
CreateCipher/AES-192-16     128ns ± 2%      78ns ± 3%  -38.82%  (p=0.000 n=10+10)
CreateCipher/AES-256-16     156ns ± 3%     105ns ± 4%  -32.33%  (p=0.000 n=10+10)

name                     old speed      new speed      delta
Encrypt/AES-128-16       3.40GB/s ± 2%  3.59GB/s ± 2%   +5.35%  (p=0.000 n=10+10)
Encrypt/AES-192-16       3.27GB/s ± 3%  3.40GB/s ± 2%   +4.13%  (p=0.000 n=10+10)
Encrypt/AES-256-16       3.09GB/s ± 2%  3.26GB/s ± 1%   +5.42%  (p=0.000 n=10+10)
Decrypt/AES-128-16       3.55GB/s ± 2%  3.66GB/s ± 1%   +3.09%  (p=0.000 n=10+10)
Decrypt/AES-192-16       3.35GB/s ± 2%  3.46GB/s ± 2%   +3.14%  (p=0.000 n=10+10)
Decrypt/AES-256-16       3.14GB/s ± 2%  3.24GB/s ± 3%   +3.39%  (p=0.000 n=10+10)

name                     old alloc/op   new alloc/op   delta
Encrypt/AES-128-16          0.00B          0.00B          ~     (all equal)
Encrypt/AES-192-16          0.00B          0.00B          ~     (all equal)
Encrypt/AES-256-16          0.00B          0.00B          ~     (all equal)
Decrypt/AES-128-16          0.00B          0.00B          ~     (all equal)
Decrypt/AES-192-16          0.00B          0.00B          ~     (all equal)
Decrypt/AES-256-16          0.00B          0.00B          ~     (all equal)
Expand/AES-128-16           0.00B          0.00B          ~     (all equal)
Expand/AES-192-16           0.00B          0.00B          ~     (all equal)
Expand/AES-256-16           0.00B          0.00B          ~     (all equal)
CreateCipher/AES-128-16      448B ± 0%      512B ± 0%  +14.29%  (p=0.000 n=10+10)
CreateCipher/AES-192-16      512B ± 0%      512B ± 0%     ~     (all equal)
CreateCipher/AES-256-16      576B ± 0%      512B ± 0%  -11.11%  (p=0.000 n=10+10)

name                     old allocs/op  new allocs/op  delta
Encrypt/AES-128-16           0.00           0.00          ~     (all equal)
Encrypt/AES-192-16           0.00           0.00          ~     (all equal)
Encrypt/AES-256-16           0.00           0.00          ~     (all equal)
Decrypt/AES-128-16           0.00           0.00          ~     (all equal)
Decrypt/AES-192-16           0.00           0.00          ~     (all equal)
Decrypt/AES-256-16           0.00           0.00          ~     (all equal)
Expand/AES-128-16            0.00           0.00          ~     (all equal)
Expand/AES-192-16            0.00           0.00          ~     (all equal)
Expand/AES-256-16            0.00           0.00          ~     (all equal)
CreateCipher/AES-128-16      4.00 ± 0%      1.00 ± 0%  -75.00%  (p=0.000 n=10+10)
CreateCipher/AES-192-16      4.00 ± 0%      1.00 ± 0%  -75.00%  (p=0.000 n=10+10)
CreateCipher/AES-256-16      4.00 ± 0%      1.00 ± 0%  -75.00%  (p=0.000 n=10+10)

Change-Id: I0ea0b21cf84b11b6a5fc7c6ace144390eb55438b
Reviewed-on: https://go-review.googlesource.com/c/go/+/561080
Reviewed-by: Damien Neil <dneil@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Jorropo <jorropo.pgm@gmail.com>
Reviewed-by: Roland Shoemaker <roland@golang.org>
src/crypto/aes/aes_gcm.go
src/crypto/aes/aes_test.go
src/crypto/aes/cipher.go
src/crypto/aes/cipher_asm.go
src/crypto/aes/gcm_ppc64x.go

index b95796970c86ad589fa04f564d457a7dff56f36d..d1e3fade71cc53d7609b9089b52dd4d0bf94ebe6 100644 (file)
@@ -45,7 +45,7 @@ var _ gcmAble = (*aesCipherGCM)(nil)
 // NewGCM returns the AES cipher wrapped in Galois Counter Mode. This is only
 // called by [crypto/cipher.NewGCM] via the gcmAble interface.
 func (c *aesCipherGCM) NewGCM(nonceSize, tagSize int) (cipher.AEAD, error) {
-       g := &gcmAsm{ks: c.enc, nonceSize: nonceSize, tagSize: tagSize}
+       g := &gcmAsm{ks: c.enc[:c.l], nonceSize: nonceSize, tagSize: tagSize}
        gcmAesInit(&g.productTable, g.ks)
        return g, nil
 }
index 1e8bac4bb5fc926b94add604aca3bf32f1fd617c..6035f16050cfe100b35ae1b827c26480ef294111 100644 (file)
@@ -345,7 +345,12 @@ func mustPanic(t *testing.T, msg string, f func()) {
 }
 
 func BenchmarkEncrypt(b *testing.B) {
-       tt := encryptTests[0]
+       b.Run("AES-128", func(b *testing.B) { benchmarkEncrypt(b, encryptTests[1]) })
+       b.Run("AES-192", func(b *testing.B) { benchmarkEncrypt(b, encryptTests[2]) })
+       b.Run("AES-256", func(b *testing.B) { benchmarkEncrypt(b, encryptTests[3]) })
+}
+
+func benchmarkEncrypt(b *testing.B, tt CryptTest) {
        c, err := NewCipher(tt.key)
        if err != nil {
                b.Fatal("NewCipher:", err)
@@ -359,7 +364,12 @@ func BenchmarkEncrypt(b *testing.B) {
 }
 
 func BenchmarkDecrypt(b *testing.B) {
-       tt := encryptTests[0]
+       b.Run("AES-128", func(b *testing.B) { benchmarkDecrypt(b, encryptTests[1]) })
+       b.Run("AES-192", func(b *testing.B) { benchmarkDecrypt(b, encryptTests[2]) })
+       b.Run("AES-256", func(b *testing.B) { benchmarkDecrypt(b, encryptTests[3]) })
+}
+
+func benchmarkDecrypt(b *testing.B, tt CryptTest) {
        c, err := NewCipher(tt.key)
        if err != nil {
                b.Fatal("NewCipher:", err)
@@ -373,11 +383,30 @@ func BenchmarkDecrypt(b *testing.B) {
 }
 
 func BenchmarkExpand(b *testing.B) {
-       tt := encryptTests[0]
-       n := len(tt.key) + 28
-       c := &aesCipher{make([]uint32, n), make([]uint32, n)}
+       b.Run("AES-128", func(b *testing.B) { benchmarkExpand(b, encryptTests[1]) })
+       b.Run("AES-192", func(b *testing.B) { benchmarkExpand(b, encryptTests[2]) })
+       b.Run("AES-256", func(b *testing.B) { benchmarkExpand(b, encryptTests[3]) })
+}
+
+func benchmarkExpand(b *testing.B, tt CryptTest) {
+       c := &aesCipher{l: uint8(len(tt.key) + 28)}
        b.ResetTimer()
        for i := 0; i < b.N; i++ {
-               expandKey(tt.key, c.enc, c.dec)
+               expandKey(tt.key, c.enc[:c.l], c.dec[:c.l])
+       }
+}
+
+func BenchmarkCreateCipher(b *testing.B) {
+       b.Run("AES-128", func(b *testing.B) { benchmarkCreateCipher(b, encryptTests[1]) })
+       b.Run("AES-192", func(b *testing.B) { benchmarkCreateCipher(b, encryptTests[2]) })
+       b.Run("AES-256", func(b *testing.B) { benchmarkCreateCipher(b, encryptTests[3]) })
+}
+
+func benchmarkCreateCipher(b *testing.B, tt CryptTest) {
+       b.ReportAllocs()
+       for i := 0; i < b.N; i++ {
+               if _, err := NewCipher(tt.key); err != nil {
+                       b.Fatal(err)
+               }
        }
 }
index a9e6208696dae8f4eb692760fb60113f0c3585d9..cde2e45d2ca55962c9a1b58694ea0b6c7b1a37e7 100644 (file)
@@ -16,8 +16,9 @@ const BlockSize = 16
 
 // A cipher is an instance of AES encryption using a particular key.
 type aesCipher struct {
-       enc []uint32
-       dec []uint32
+       l   uint8 // only this length of the enc and dec array is actually used
+       enc [28 + 32]uint32
+       dec [28 + 32]uint32
 }
 
 type KeySizeError int
@@ -47,9 +48,8 @@ func NewCipher(key []byte) (cipher.Block, error) {
 // newCipherGeneric creates and returns a new cipher.Block
 // implemented in pure Go.
 func newCipherGeneric(key []byte) (cipher.Block, error) {
-       n := len(key) + 28
-       c := aesCipher{make([]uint32, n), make([]uint32, n)}
-       expandKeyGo(key, c.enc, c.dec)
+       c := aesCipher{l: uint8(len(key) + 28)}
+       expandKeyGo(key, c.enc[:c.l], c.dec[:c.l])
        return &c, nil
 }
 
@@ -65,7 +65,7 @@ func (c *aesCipher) Encrypt(dst, src []byte) {
        if alias.InexactOverlap(dst[:BlockSize], src[:BlockSize]) {
                panic("crypto/aes: invalid buffer overlap")
        }
-       encryptBlockGo(c.enc, dst, src)
+       encryptBlockGo(c.enc[:c.l], dst, src)
 }
 
 func (c *aesCipher) Decrypt(dst, src []byte) {
@@ -78,5 +78,5 @@ func (c *aesCipher) Decrypt(dst, src []byte) {
        if alias.InexactOverlap(dst[:BlockSize], src[:BlockSize]) {
                panic("crypto/aes: invalid buffer overlap")
        }
-       decryptBlockGo(c.dec, dst, src)
+       decryptBlockGo(c.dec[:c.l], dst, src)
 }
index 5451fa60d38677bedf2342671ba14b356b139eb1..3e5f589c2cdd0b50b1010ad27fe3d87c573b3f38 100644 (file)
@@ -44,8 +44,9 @@ func newCipher(key []byte) (cipher.Block, error) {
        if !supportsAES {
                return newCipherGeneric(key)
        }
-       n := len(key) + 28
-       c := aesCipherAsm{aesCipher{make([]uint32, n), make([]uint32, n)}}
+       // Note that under certain circumstances, we only return the inner aesCipherAsm.
+       // This avoids an unnecessary allocation of the aesCipher struct.
+       c := aesCipherGCM{aesCipherAsm{aesCipher{l: uint8(len(key) + 28)}}}
        var rounds int
        switch len(key) {
        case 128 / 8:
@@ -60,9 +61,9 @@ func newCipher(key []byte) (cipher.Block, error) {
 
        expandKeyAsm(rounds, &key[0], &c.enc[0], &c.dec[0])
        if supportsAES && supportsGFMUL {
-               return &aesCipherGCM{c}, nil
+               return &c, nil
        }
-       return &c, nil
+       return &c.aesCipherAsm, nil
 }
 
 func (c *aesCipherAsm) BlockSize() int { return BlockSize }
@@ -78,7 +79,7 @@ func (c *aesCipherAsm) Encrypt(dst, src []byte) {
        if alias.InexactOverlap(dst[:BlockSize], src[:BlockSize]) {
                panic("crypto/aes: invalid buffer overlap")
        }
-       encryptBlockAsm(len(c.enc)/4-1, &c.enc[0], &dst[0], &src[0])
+       encryptBlockAsm(int(c.l)/4-1, &c.enc[0], &dst[0], &src[0])
 }
 
 func (c *aesCipherAsm) Decrypt(dst, src []byte) {
@@ -92,7 +93,7 @@ func (c *aesCipherAsm) Decrypt(dst, src []byte) {
        if alias.InexactOverlap(dst[:BlockSize], src[:BlockSize]) {
                panic("crypto/aes: invalid buffer overlap")
        }
-       decryptBlockAsm(len(c.dec)/4-1, &c.dec[0], &dst[0], &src[0])
+       decryptBlockAsm(int(c.l)/4-1, &c.dec[0], &dst[0], &src[0])
 }
 
 // expandKey is used by BenchmarkExpand to ensure that the asm implementation
index 04fac72459cbe253bf8f8833e0688dccc69a2bff..f206b476077d0998e6f917514fb560064502a463 100644 (file)
@@ -57,7 +57,7 @@ func counterCryptASM(nr int, out, in []byte, counter *[gcmBlockSize]byte, key *u
 // called by [crypto/cipher.NewGCM] via the gcmAble interface.
 func (c *aesCipherAsm) NewGCM(nonceSize, tagSize int) (cipher.AEAD, error) {
        var h1, h2 uint64
-       g := &gcmAsm{cipher: c, ks: c.enc, nonceSize: nonceSize, tagSize: tagSize}
+       g := &gcmAsm{cipher: c, ks: c.enc[:c.l], nonceSize: nonceSize, tagSize: tagSize}
 
        hle := make([]byte, gcmBlockSize)