Skip to content

Commit 2c150f4

Browse files
marten-seemannneild
authored andcommitted
crypto/aes: speed up AES by reducing allocations
By embedding the arrays into aesCipher directly, we can save a few allocations when creating a new AES cipher. This also avoids a lot of pointer chasing when encrypting and decrypting, leading to 3-4% faster performance. Fixes #65507. name old time/op new time/op delta Encrypt/AES-128-16 4.70ns ± 3% 4.46ns ± 2% -5.08% (p=0.000 n=10+10) Encrypt/AES-192-16 4.90ns ± 3% 4.71ns ± 2% -3.98% (p=0.000 n=10+10) Encrypt/AES-256-16 5.18ns ± 3% 4.91ns ± 1% -5.15% (p=0.000 n=10+10) Decrypt/AES-128-16 4.51ns ± 2% 4.37ns ± 1% -3.01% (p=0.000 n=10+10) Decrypt/AES-192-16 4.77ns ± 2% 4.63ns ± 2% -3.05% (p=0.000 n=10+10) Decrypt/AES-256-16 5.10ns ± 2% 4.93ns ± 3% -3.27% (p=0.000 n=10+10) Expand/AES-128-16 52.5ns ± 4% 55.9ns ± 3% +6.58% (p=0.000 n=10+10) Expand/AES-192-16 45.6ns ± 5% 48.1ns ± 2% +5.56% (p=0.000 n=10+10) Expand/AES-256-16 69.2ns ± 5% 71.7ns ± 3% +3.61% (p=0.006 n=9+10) CreateCipher/AES-128-16 130ns ± 3% 84ns ± 3% -35.13% (p=0.000 n=10+10) CreateCipher/AES-192-16 128ns ± 2% 78ns ± 3% -38.82% (p=0.000 n=10+10) CreateCipher/AES-256-16 156ns ± 3% 105ns ± 4% -32.33% (p=0.000 n=10+10) name old speed new speed delta Encrypt/AES-128-16 3.40GB/s ± 2% 3.59GB/s ± 2% +5.35% (p=0.000 n=10+10) Encrypt/AES-192-16 3.27GB/s ± 3% 3.40GB/s ± 2% +4.13% (p=0.000 n=10+10) Encrypt/AES-256-16 3.09GB/s ± 2% 3.26GB/s ± 1% +5.42% (p=0.000 n=10+10) Decrypt/AES-128-16 3.55GB/s ± 2% 3.66GB/s ± 1% +3.09% (p=0.000 n=10+10) Decrypt/AES-192-16 3.35GB/s ± 2% 3.46GB/s ± 2% +3.14% (p=0.000 n=10+10) Decrypt/AES-256-16 3.14GB/s ± 2% 3.24GB/s ± 3% +3.39% (p=0.000 n=10+10) name old alloc/op new alloc/op delta Encrypt/AES-128-16 0.00B 0.00B ~ (all equal) Encrypt/AES-192-16 0.00B 0.00B ~ (all equal) Encrypt/AES-256-16 0.00B 0.00B ~ (all equal) Decrypt/AES-128-16 0.00B 0.00B ~ (all equal) Decrypt/AES-192-16 0.00B 0.00B ~ (all equal) Decrypt/AES-256-16 0.00B 0.00B ~ (all equal) Expand/AES-128-16 0.00B 0.00B ~ (all equal) Expand/AES-192-16 0.00B 0.00B ~ (all equal) Expand/AES-256-16 0.00B 0.00B ~ (all equal) CreateCipher/AES-128-16 448B ± 0% 512B ± 0% +14.29% (p=0.000 n=10+10) CreateCipher/AES-192-16 512B ± 0% 512B ± 0% ~ (all equal) CreateCipher/AES-256-16 576B ± 0% 512B ± 0% -11.11% (p=0.000 n=10+10) name old allocs/op new allocs/op delta Encrypt/AES-128-16 0.00 0.00 ~ (all equal) Encrypt/AES-192-16 0.00 0.00 ~ (all equal) Encrypt/AES-256-16 0.00 0.00 ~ (all equal) Decrypt/AES-128-16 0.00 0.00 ~ (all equal) Decrypt/AES-192-16 0.00 0.00 ~ (all equal) Decrypt/AES-256-16 0.00 0.00 ~ (all equal) Expand/AES-128-16 0.00 0.00 ~ (all equal) Expand/AES-192-16 0.00 0.00 ~ (all equal) Expand/AES-256-16 0.00 0.00 ~ (all equal) CreateCipher/AES-128-16 4.00 ± 0% 1.00 ± 0% -75.00% (p=0.000 n=10+10) CreateCipher/AES-192-16 4.00 ± 0% 1.00 ± 0% -75.00% (p=0.000 n=10+10) CreateCipher/AES-256-16 4.00 ± 0% 1.00 ± 0% -75.00% (p=0.000 n=10+10) Change-Id: I0ea0b21cf84b11b6a5fc7c6ace144390eb55438b Reviewed-on: https://go-review.googlesource.com/c/go/+/561080 Reviewed-by: Damien Neil <[email protected]> LUCI-TryBot-Result: Go LUCI <[email protected]> Reviewed-by: Jorropo <[email protected]> Reviewed-by: Roland Shoemaker <[email protected]>
1 parent c4792e6 commit 2c150f4

File tree

5 files changed

+51
-21
lines changed

5 files changed

+51
-21
lines changed

src/crypto/aes/aes_gcm.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ var _ gcmAble = (*aesCipherGCM)(nil)
4545
// NewGCM returns the AES cipher wrapped in Galois Counter Mode. This is only
4646
// called by [crypto/cipher.NewGCM] via the gcmAble interface.
4747
func (c *aesCipherGCM) NewGCM(nonceSize, tagSize int) (cipher.AEAD, error) {
48-
g := &gcmAsm{ks: c.enc, nonceSize: nonceSize, tagSize: tagSize}
48+
g := &gcmAsm{ks: c.enc[:c.l], nonceSize: nonceSize, tagSize: tagSize}
4949
gcmAesInit(&g.productTable, g.ks)
5050
return g, nil
5151
}

src/crypto/aes/aes_test.go

+35-6
Original file line numberDiff line numberDiff line change
@@ -345,7 +345,12 @@ func mustPanic(t *testing.T, msg string, f func()) {
345345
}
346346

347347
func BenchmarkEncrypt(b *testing.B) {
348-
tt := encryptTests[0]
348+
b.Run("AES-128", func(b *testing.B) { benchmarkEncrypt(b, encryptTests[1]) })
349+
b.Run("AES-192", func(b *testing.B) { benchmarkEncrypt(b, encryptTests[2]) })
350+
b.Run("AES-256", func(b *testing.B) { benchmarkEncrypt(b, encryptTests[3]) })
351+
}
352+
353+
func benchmarkEncrypt(b *testing.B, tt CryptTest) {
349354
c, err := NewCipher(tt.key)
350355
if err != nil {
351356
b.Fatal("NewCipher:", err)
@@ -359,7 +364,12 @@ func BenchmarkEncrypt(b *testing.B) {
359364
}
360365

361366
func BenchmarkDecrypt(b *testing.B) {
362-
tt := encryptTests[0]
367+
b.Run("AES-128", func(b *testing.B) { benchmarkDecrypt(b, encryptTests[1]) })
368+
b.Run("AES-192", func(b *testing.B) { benchmarkDecrypt(b, encryptTests[2]) })
369+
b.Run("AES-256", func(b *testing.B) { benchmarkDecrypt(b, encryptTests[3]) })
370+
}
371+
372+
func benchmarkDecrypt(b *testing.B, tt CryptTest) {
363373
c, err := NewCipher(tt.key)
364374
if err != nil {
365375
b.Fatal("NewCipher:", err)
@@ -373,11 +383,30 @@ func BenchmarkDecrypt(b *testing.B) {
373383
}
374384

375385
func BenchmarkExpand(b *testing.B) {
376-
tt := encryptTests[0]
377-
n := len(tt.key) + 28
378-
c := &aesCipher{make([]uint32, n), make([]uint32, n)}
386+
b.Run("AES-128", func(b *testing.B) { benchmarkExpand(b, encryptTests[1]) })
387+
b.Run("AES-192", func(b *testing.B) { benchmarkExpand(b, encryptTests[2]) })
388+
b.Run("AES-256", func(b *testing.B) { benchmarkExpand(b, encryptTests[3]) })
389+
}
390+
391+
func benchmarkExpand(b *testing.B, tt CryptTest) {
392+
c := &aesCipher{l: uint8(len(tt.key) + 28)}
379393
b.ResetTimer()
380394
for i := 0; i < b.N; i++ {
381-
expandKey(tt.key, c.enc, c.dec)
395+
expandKey(tt.key, c.enc[:c.l], c.dec[:c.l])
396+
}
397+
}
398+
399+
func BenchmarkCreateCipher(b *testing.B) {
400+
b.Run("AES-128", func(b *testing.B) { benchmarkCreateCipher(b, encryptTests[1]) })
401+
b.Run("AES-192", func(b *testing.B) { benchmarkCreateCipher(b, encryptTests[2]) })
402+
b.Run("AES-256", func(b *testing.B) { benchmarkCreateCipher(b, encryptTests[3]) })
403+
}
404+
405+
func benchmarkCreateCipher(b *testing.B, tt CryptTest) {
406+
b.ReportAllocs()
407+
for i := 0; i < b.N; i++ {
408+
if _, err := NewCipher(tt.key); err != nil {
409+
b.Fatal(err)
410+
}
382411
}
383412
}

src/crypto/aes/cipher.go

+7-7
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,9 @@ const BlockSize = 16
1616

1717
// A cipher is an instance of AES encryption using a particular key.
1818
type aesCipher struct {
19-
enc []uint32
20-
dec []uint32
19+
l uint8 // only this length of the enc and dec array is actually used
20+
enc [28 + 32]uint32
21+
dec [28 + 32]uint32
2122
}
2223

2324
type KeySizeError int
@@ -47,9 +48,8 @@ func NewCipher(key []byte) (cipher.Block, error) {
4748
// newCipherGeneric creates and returns a new cipher.Block
4849
// implemented in pure Go.
4950
func newCipherGeneric(key []byte) (cipher.Block, error) {
50-
n := len(key) + 28
51-
c := aesCipher{make([]uint32, n), make([]uint32, n)}
52-
expandKeyGo(key, c.enc, c.dec)
51+
c := aesCipher{l: uint8(len(key) + 28)}
52+
expandKeyGo(key, c.enc[:c.l], c.dec[:c.l])
5353
return &c, nil
5454
}
5555

@@ -65,7 +65,7 @@ func (c *aesCipher) Encrypt(dst, src []byte) {
6565
if alias.InexactOverlap(dst[:BlockSize], src[:BlockSize]) {
6666
panic("crypto/aes: invalid buffer overlap")
6767
}
68-
encryptBlockGo(c.enc, dst, src)
68+
encryptBlockGo(c.enc[:c.l], dst, src)
6969
}
7070

7171
func (c *aesCipher) Decrypt(dst, src []byte) {
@@ -78,5 +78,5 @@ func (c *aesCipher) Decrypt(dst, src []byte) {
7878
if alias.InexactOverlap(dst[:BlockSize], src[:BlockSize]) {
7979
panic("crypto/aes: invalid buffer overlap")
8080
}
81-
decryptBlockGo(c.dec, dst, src)
81+
decryptBlockGo(c.dec[:c.l], dst, src)
8282
}

src/crypto/aes/cipher_asm.go

+7-6
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,9 @@ func newCipher(key []byte) (cipher.Block, error) {
4444
if !supportsAES {
4545
return newCipherGeneric(key)
4646
}
47-
n := len(key) + 28
48-
c := aesCipherAsm{aesCipher{make([]uint32, n), make([]uint32, n)}}
47+
// Note that under certain circumstances, we only return the inner aesCipherAsm.
48+
// This avoids an unnecessary allocation of the aesCipher struct.
49+
c := aesCipherGCM{aesCipherAsm{aesCipher{l: uint8(len(key) + 28)}}}
4950
var rounds int
5051
switch len(key) {
5152
case 128 / 8:
@@ -60,9 +61,9 @@ func newCipher(key []byte) (cipher.Block, error) {
6061

6162
expandKeyAsm(rounds, &key[0], &c.enc[0], &c.dec[0])
6263
if supportsAES && supportsGFMUL {
63-
return &aesCipherGCM{c}, nil
64+
return &c, nil
6465
}
65-
return &c, nil
66+
return &c.aesCipherAsm, nil
6667
}
6768

6869
func (c *aesCipherAsm) BlockSize() int { return BlockSize }
@@ -78,7 +79,7 @@ func (c *aesCipherAsm) Encrypt(dst, src []byte) {
7879
if alias.InexactOverlap(dst[:BlockSize], src[:BlockSize]) {
7980
panic("crypto/aes: invalid buffer overlap")
8081
}
81-
encryptBlockAsm(len(c.enc)/4-1, &c.enc[0], &dst[0], &src[0])
82+
encryptBlockAsm(int(c.l)/4-1, &c.enc[0], &dst[0], &src[0])
8283
}
8384

8485
func (c *aesCipherAsm) Decrypt(dst, src []byte) {
@@ -92,7 +93,7 @@ func (c *aesCipherAsm) Decrypt(dst, src []byte) {
9293
if alias.InexactOverlap(dst[:BlockSize], src[:BlockSize]) {
9394
panic("crypto/aes: invalid buffer overlap")
9495
}
95-
decryptBlockAsm(len(c.dec)/4-1, &c.dec[0], &dst[0], &src[0])
96+
decryptBlockAsm(int(c.l)/4-1, &c.dec[0], &dst[0], &src[0])
9697
}
9798

9899
// expandKey is used by BenchmarkExpand to ensure that the asm implementation

src/crypto/aes/gcm_ppc64x.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ func counterCryptASM(nr int, out, in []byte, counter *[gcmBlockSize]byte, key *u
5757
// called by [crypto/cipher.NewGCM] via the gcmAble interface.
5858
func (c *aesCipherAsm) NewGCM(nonceSize, tagSize int) (cipher.AEAD, error) {
5959
var h1, h2 uint64
60-
g := &gcmAsm{cipher: c, ks: c.enc, nonceSize: nonceSize, tagSize: tagSize}
60+
g := &gcmAsm{cipher: c, ks: c.enc[:c.l], nonceSize: nonceSize, tagSize: tagSize}
6161

6262
hle := make([]byte, gcmBlockSize)
6363

0 commit comments

Comments
 (0)