Skip to content

Commit 917e726

Browse files
zorrorffmcherrymui
authored andcommitted
crypto/aes: optimize arm64 AES implementation
This patch makes use of arm64 AES instructions to accelerate AES computation and only supports optimization on Linux for arm64 name old time/op new time/op delta Encrypt-32 255ns ± 0% 26ns ± 0% -89.73% Decrypt-32 256ns ± 0% 26ns ± 0% -89.77% Expand-32 990ns ± 5% 901ns ± 0% -9.05% name old speed new speed delta Encrypt-32 62.5MB/s ± 0% 610.4MB/s ± 0% +876.39% Decrypt-32 62.3MB/s ± 0% 610.2MB/s ± 0% +879.6% Fixes #18498 Change-Id: If416e5a151785325527b32ff72f6da3812493ed0 Reviewed-on: https://go-review.googlesource.com/64490 Run-TryBot: Cherry Zhang <[email protected]> TryBot-Result: Gobot Gobot <[email protected]> Reviewed-by: Cherry Zhang <[email protected]>
1 parent c4f3fe9 commit 917e726

File tree

3 files changed

+181
-1
lines changed

3 files changed

+181
-1
lines changed

src/crypto/aes/asm_arm64.s

Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
// Copyright 2017 The Go Authors. All rights reserved.
2+
// Use of this source code is governed by a BSD-style
3+
// license that can be found in the LICENSE file.
4+
5+
#include "textflag.h"
6+
7+
// func encryptBlockAsm(nr int, xk *uint32, dst, src *byte)
8+
TEXT ·encryptBlockAsm(SB),NOSPLIT,$0
9+
MOVD nr+0(FP), R9
10+
MOVD xk+8(FP), R10
11+
MOVD dst+16(FP), R11
12+
MOVD src+24(FP), R12
13+
14+
VLD1 (R12), [V0.B16]
15+
16+
CMP $12, R9
17+
BLT enc128
18+
BEQ enc196
19+
enc256:
20+
VLD1.P 32(R10), [V1.B16, V2.B16]
21+
AESE V1.B16, V0.B16
22+
AESMC V0.B16, V0.B16
23+
AESE V2.B16, V0.B16
24+
AESMC V0.B16, V0.B16
25+
enc196:
26+
VLD1.P 32(R10), [V3.B16, V4.B16]
27+
AESE V3.B16, V0.B16
28+
AESMC V0.B16, V0.B16
29+
AESE V4.B16, V0.B16
30+
AESMC V0.B16, V0.B16
31+
enc128:
32+
VLD1.P 64(R10), [V5.B16, V6.B16, V7.B16, V8.B16]
33+
VLD1.P 64(R10), [V9.B16, V10.B16, V11.B16, V12.B16]
34+
VLD1.P 48(R10), [V13.B16, V14.B16, V15.B16]
35+
AESE V5.B16, V0.B16
36+
AESMC V0.B16, V0.B16
37+
AESE V6.B16, V0.B16
38+
AESMC V0.B16, V0.B16
39+
AESE V7.B16, V0.B16
40+
AESMC V0.B16, V0.B16
41+
AESE V8.B16, V0.B16
42+
AESMC V0.B16, V0.B16
43+
AESE V9.B16, V0.B16
44+
AESMC V0.B16, V0.B16
45+
AESE V10.B16, V0.B16
46+
AESMC V0.B16, V0.B16
47+
AESE V11.B16, V0.B16
48+
AESMC V0.B16, V0.B16
49+
AESE V12.B16, V0.B16
50+
AESMC V0.B16, V0.B16
51+
AESE V13.B16, V0.B16
52+
AESMC V0.B16, V0.B16
53+
AESE V14.B16, V0.B16
54+
VEOR V0.B16, V15.B16, V0.B16
55+
VST1 [V0.B16], (R11)
56+
RET
57+
58+
// func decryptBlockAsm(nr int, xk *uint32, dst, src *byte)
59+
TEXT ·decryptBlockAsm(SB),NOSPLIT,$0
60+
MOVD nr+0(FP), R9
61+
MOVD xk+8(FP), R10
62+
MOVD dst+16(FP), R11
63+
MOVD src+24(FP), R12
64+
65+
VLD1 (R12), [V0.B16]
66+
67+
CMP $12, R9
68+
BLT dec128
69+
BEQ dec196
70+
dec256:
71+
VLD1.P 32(R10), [V1.B16, V2.B16]
72+
AESD V1.B16, V0.B16
73+
AESIMC V0.B16, V0.B16
74+
AESD V2.B16, V0.B16
75+
AESIMC V0.B16, V0.B16
76+
dec196:
77+
VLD1.P 32(R10), [V3.B16, V4.B16]
78+
AESD V3.B16, V0.B16
79+
AESIMC V0.B16, V0.B16
80+
AESD V4.B16, V0.B16
81+
AESIMC V0.B16, V0.B16
82+
dec128:
83+
VLD1.P 64(R10), [V5.B16, V6.B16, V7.B16, V8.B16]
84+
VLD1.P 64(R10), [V9.B16, V10.B16, V11.B16, V12.B16]
85+
VLD1.P 48(R10), [V13.B16, V14.B16, V15.B16]
86+
AESD V5.B16, V0.B16
87+
AESIMC V0.B16, V0.B16
88+
AESD V6.B16, V0.B16
89+
AESIMC V0.B16, V0.B16
90+
AESD V7.B16, V0.B16
91+
AESIMC V0.B16, V0.B16
92+
AESD V8.B16, V0.B16
93+
AESIMC V0.B16, V0.B16
94+
AESD V9.B16, V0.B16
95+
AESIMC V0.B16, V0.B16
96+
AESD V10.B16, V0.B16
97+
AESIMC V0.B16, V0.B16
98+
AESD V11.B16, V0.B16
99+
AESIMC V0.B16, V0.B16
100+
AESD V12.B16, V0.B16
101+
AESIMC V0.B16, V0.B16
102+
AESD V13.B16, V0.B16
103+
AESIMC V0.B16, V0.B16
104+
AESD V14.B16, V0.B16
105+
VEOR V0.B16, V15.B16, V0.B16
106+
VST1 [V0.B16], (R11)
107+
RET

src/crypto/aes/cipher_arm64.go

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
// Copyright 2017 The Go Authors. All rights reserved.
2+
// Use of this source code is governed by a BSD-style
3+
// license that can be found in the LICENSE file.
4+
5+
package aes
6+
7+
import (
8+
"crypto/cipher"
9+
"internal/cpu"
10+
"math/bits"
11+
)
12+
13+
// defined in asm_arm64.s
14+
//go:noescape
15+
func encryptBlockAsm(nr int, xk *uint32, dst, src *byte)
16+
17+
//go:noescape
18+
func decryptBlockAsm(nr int, xk *uint32, dst, src *byte)
19+
20+
type aesCipherAsm struct {
21+
aesCipher
22+
}
23+
24+
func newCipher(key []byte) (cipher.Block, error) {
25+
if !cpu.ARM64.HasAES {
26+
return newCipherGeneric(key)
27+
}
28+
n := len(key) + 28
29+
c := aesCipherAsm{aesCipher{make([]uint32, n), make([]uint32, n)}}
30+
arm64ExpandKey(key, c.enc, c.dec)
31+
return &c, nil
32+
}
33+
34+
func (c *aesCipherAsm) BlockSize() int { return BlockSize }
35+
36+
func (c *aesCipherAsm) Encrypt(dst, src []byte) {
37+
if len(src) < BlockSize {
38+
panic("crypto/aes: input not full block")
39+
}
40+
if len(dst) < BlockSize {
41+
panic("crypto/aes: output not full block")
42+
}
43+
encryptBlockAsm(len(c.enc)/4-1, &c.enc[0], &dst[0], &src[0])
44+
}
45+
46+
func (c *aesCipherAsm) Decrypt(dst, src []byte) {
47+
if len(src) < BlockSize {
48+
panic("crypto/aes: input not full block")
49+
}
50+
if len(dst) < BlockSize {
51+
panic("crypto/aes: output not full block")
52+
}
53+
decryptBlockAsm(len(c.dec)/4-1, &c.dec[0], &dst[0], &src[0])
54+
}
55+
56+
func arm64ExpandKey(key []byte, enc, dec []uint32) {
57+
expandKeyGo(key, enc, dec)
58+
nk := len(enc)
59+
for i := 0; i < nk; i++ {
60+
enc[i] = bits.ReverseBytes32(enc[i])
61+
dec[i] = bits.ReverseBytes32(dec[i])
62+
}
63+
}
64+
65+
// expandKey is used by BenchmarkExpand to ensure that the asm implementation
66+
// of key expansion is used for the benchmark when it is available.
67+
func expandKey(key []byte, enc, dec []uint32) {
68+
if cpu.ARM64.HasAES {
69+
arm64ExpandKey(key, enc, dec)
70+
} else {
71+
expandKeyGo(key, enc, dec)
72+
}
73+
}

src/crypto/aes/cipher_generic.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
// Use of this source code is governed by a BSD-style
33
// license that can be found in the LICENSE file.
44

5-
// +build !amd64,!s390x,!ppc64le
5+
// +build !amd64,!s390x,!ppc64le,!arm64
66

77
package aes
88

0 commit comments

Comments
 (0)