Skip to content

Commit a8ca649

Browse files
committed
crypto/aes,crypto/cipher: improve gcm performance on ppc64x
This improves performance for AES-GCM. The function counterCrypt is written in assembler so the loop can be unrolled and the stitched approach used for the encryption. This implementation works on ppc64le and ppc64. The use of GOPPC64=power9 generates the best performance, goos: linux goarch: ppc64le pkg: crypto/cipher cpu: POWER10 │ gcmx8.cpu1.out │ gcmx8.new.cpu1.out │ │ sec/op │ sec/op vs base │ AESGCM/Open-128-64 180.5n ± 0% 152.7n ± 1% -15.43% (p=0.000 n=8) AESGCM/Seal-128-64 166.8n ± 0% 147.2n ± 0% -11.72% (p=0.000 n=8) AESGCM/Open-256-64 194.9n ± 0% 156.6n ± 1% -19.65% (p=0.000 n=8) AESGCM/Seal-256-64 183.7n ± 0% 157.0n ± 0% -14.51% (p=0.000 n=8) AESGCM/Open-128-1350 1769.5n ± 0% 454.5n ± 0% -74.31% (p=0.000 n=8) AESGCM/Seal-128-1350 1759.0n ± 0% 453.7n ± 0% -74.21% (p=0.000 n=8) AESGCM/Open-256-1350 2104.0n ± 0% 504.4n ± 1% -76.03% (p=0.000 n=8) AESGCM/Seal-256-1350 2092.0n ± 0% 503.0n ± 0% -75.96% (p=0.000 n=8) AESGCM/Open-128-8192 10.054µ ± 0% 1.961µ ± 0% -80.50% (p=0.000 n=8) AESGCM/Seal-128-8192 10.050µ ± 0% 1.965µ ± 0% -80.45% (p=0.000 n=8) AESGCM/Open-256-8192 12.080µ ± 0% 2.232µ ± 0% -81.52% (p=0.000 n=8) AESGCM/Seal-256-8192 12.069µ ± 0% 2.238µ ± 0% -81.46% (p=0.000 n=8) geomean 1.566µ 535.5n -65.80% │ gcmx8.cpu1.out │ gcmx8.new.cpu1.out │ │ B/s │ B/s vs base │ AESGCM/Open-128-64 338.1Mi ± 0% 399.8Mi ± 1% +18.27% (p=0.000 n=8) AESGCM/Seal-128-64 366.1Mi ± 0% 414.6Mi ± 0% +13.28% (p=0.000 n=8) AESGCM/Open-256-64 313.1Mi ± 0% 389.7Mi ± 0% +24.47% (p=0.000 n=8) AESGCM/Seal-256-64 332.3Mi ± 0% 388.5Mi ± 0% +16.93% (p=0.000 n=8) AESGCM/Open-128-1350 727.6Mi ± 0% 2832.8Mi ± 0% +289.33% (p=0.000 n=8) AESGCM/Seal-128-1350 732.0Mi ± 0% 2837.8Mi ± 0% +287.70% (p=0.000 n=8) AESGCM/Open-256-1350 611.9Mi ± 0% 2552.6Mi ± 0% +317.18% (p=0.000 n=8) AESGCM/Seal-256-1350 615.3Mi ± 0% 2559.6Mi ± 0% +315.97% (p=0.000 n=8) AESGCM/Open-128-8192 777.1Mi ± 0% 3983.5Mi ± 0% +412.63% (p=0.000 n=8) AESGCM/Seal-128-8192 777.3Mi ± 0% 3975.9Mi ± 0% +411.47% (p=0.000 n=8) AESGCM/Open-256-8192 646.7Mi ± 0% 3500.6Mi ± 0% +441.27% (p=0.000 n=8) AESGCM/Seal-256-8192 647.3Mi ± 0% 3491.1Mi ± 0% +439.30% (p=0.000 n=8) geomean 542.7Mi 1.550Gi +192.42% Change-Id: I3600831a263ec8a99b5e3bdd495eb36e966d8075 Reviewed-on: https://go-review.googlesource.com/c/go/+/484575 Reviewed-by: Roland Shoemaker <[email protected]> TryBot-Result: Gopher Robot <[email protected]> LUCI-TryBot-Result: Go LUCI <[email protected]> Run-TryBot: Lynn Boger <[email protected]> Reviewed-by: Paul Murphy <[email protected]> Reviewed-by: Than McIntosh <[email protected]>
1 parent e0948d8 commit a8ca649

File tree

2 files changed

+521
-30
lines changed

2 files changed

+521
-30
lines changed

src/crypto/aes/gcm_ppc64x.go

Lines changed: 5 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,8 @@ type gcmAsm struct {
5151
tagSize int
5252
}
5353

54+
func counterCryptASM(nr int, out, in []byte, counter *[gcmBlockSize]byte, key *uint32)
55+
5456
// NewGCM returns the AES cipher wrapped in Galois Counter Mode. This is only
5557
// called by crypto/cipher.NewGCM via the gcmAble interface.
5658
func (c *aesCipherAsm) NewGCM(nonceSize, tagSize int) (cipher.AEAD, error) {
@@ -114,34 +116,10 @@ func (g *gcmAsm) deriveCounter(counter *[gcmBlockSize]byte, nonce []byte) {
114116
// into out. counter is the initial count value and will be updated with the next
115117
// count value. The length of out must be greater than or equal to the length
116118
// of in.
119+
// counterCryptASM implements counterCrypt which then allows the loop to
120+
// be unrolled and optimized.
117121
func (g *gcmAsm) counterCrypt(out, in []byte, counter *[gcmBlockSize]byte) {
118-
var mask [gcmBlockSize]byte
119-
120-
for len(in) >= gcmBlockSize {
121-
// Hint to avoid bounds check
122-
_, _ = in[15], out[15]
123-
g.cipher.Encrypt(mask[:], counter[:])
124-
gcmInc32(counter)
125-
126-
// XOR 16 bytes each loop iteration in 8 byte chunks
127-
in0 := binary.LittleEndian.Uint64(in[0:])
128-
in1 := binary.LittleEndian.Uint64(in[8:])
129-
m0 := binary.LittleEndian.Uint64(mask[:8])
130-
m1 := binary.LittleEndian.Uint64(mask[8:])
131-
binary.LittleEndian.PutUint64(out[:8], in0^m0)
132-
binary.LittleEndian.PutUint64(out[8:], in1^m1)
133-
out = out[16:]
134-
in = in[16:]
135-
}
136-
137-
if len(in) > 0 {
138-
g.cipher.Encrypt(mask[:], counter[:])
139-
gcmInc32(counter)
140-
// XOR leftover bytes
141-
for i, inb := range in {
142-
out[i] = inb ^ mask[i]
143-
}
144-
}
122+
counterCryptASM(len(g.cipher.enc)/4-1, out, in, counter, &g.cipher.enc[0])
145123
}
146124

147125
// increments the rightmost 32-bits of the count value by 1.

0 commit comments

Comments
 (0)