Skip to content

Commit 3730814

Browse files
committed
crypto/internal/bigmod: optimize addMulVVW on Wasm
The current implementation of addMulVVW makes heavy use of 64x64->128 bit multiplications and 64-bit add-with-carry, which are compiler intrinsics and are very efficient on many architectures. However, those are not supported on Wasm. Here we implement it with 32x32->64 bit operations, which is more efficient on Wasm. crypto/rsa benchmarks with Node: │ old.txt │ new.txt │ │ sec/op │ sec/op vs base │ DecryptPKCS1v15/2048 7.726m ± 1% 4.895m ± 2% -36.65% (p=0.000 n=35) DecryptPKCS1v15/3072 23.52m ± 1% 15.33m ± 1% -34.83% (p=0.000 n=35) DecryptPKCS1v15/4096 52.64m ± 2% 35.40m ± 1% -32.75% (p=0.000 n=35) EncryptPKCS1v15/2048 264.2µ ± 1% 176.9µ ± 1% -33.02% (p=0.000 n=35) DecryptOAEP/2048 7.608m ± 1% 4.911m ± 1% -35.45% (p=0.000 n=35) EncryptOAEP/2048 266.2µ ± 0% 183.3µ ± 2% -31.15% (p=0.000 n=35) SignPKCS1v15/2048 7.836m ± 1% 5.009m ± 2% -36.08% (p=0.000 n=35) VerifyPKCS1v15/2048 262.9µ ± 1% 176.3µ ± 1% -32.94% (p=0.000 n=35) SignPSS/2048 7.814m ± 0% 5.020m ± 1% -35.76% (p=0.000 n=35) VerifyPSS/2048 267.0µ ± 1% 183.8µ ± 1% -31.17% (p=0.000 n=35) geomean 2.718m 1.794m -34.01% With wazero: │ old.txt │ new.txt │ │ sec/op │ sec/op vs base │ DecryptPKCS1v15/2048 13.445m ± 0% 6.528m ± 0% -51.45% (p=0.000 n=25) DecryptPKCS1v15/3072 41.07m ± 0% 18.85m ± 0% -54.10% (p=0.000 n=25) DecryptPKCS1v15/4096 91.84m ± 1% 39.66m ± 0% -56.81% (p=0.000 n=25) EncryptPKCS1v15/2048 461.3µ ± 0% 197.2µ ± 0% -57.25% (p=0.000 n=25) DecryptOAEP/2048 13.438m ± 0% 6.577m ± 0% -51.06% (p=0.000 n=25) EncryptOAEP/2048 471.5µ ± 0% 207.7µ ± 0% -55.95% (p=0.000 n=25) SignPKCS1v15/2048 13.739m ± 0% 6.687m ± 0% -51.33% (p=0.000 n=25) VerifyPKCS1v15/2048 461.3µ ± 1% 196.8µ ± 0% -57.35% (p=0.000 n=25) SignPSS/2048 13.765m ± 0% 6.686m ± 0% -51.43% (p=0.000 n=25) VerifyPSS/2048 470.8µ ± 0% 208.9µ ± 1% -55.64% (p=0.000 n=25) geomean 4.769m 2.179m -54.31% Change-Id: I97f37d8cf1e3e9756a4e03ab4e681bf04152925f Reviewed-on: https://go-review.googlesource.com/c/go/+/626957 Reviewed-by: David Chase <[email protected]> LUCI-TryBot-Result: Go LUCI <[email protected]>
1 parent 4ffa2ae commit 3730814

File tree

3 files changed

+72
-19
lines changed

3 files changed

+72
-19
lines changed

src/crypto/internal/bigmod/nat.go

Lines changed: 0 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -688,25 +688,6 @@ func (x *Nat) montgomeryMul(a *Nat, b *Nat, m *Modulus) *Nat {
688688
return x
689689
}
690690

691-
// addMulVVW multiplies the multi-word value x by the single-word value y,
692-
// adding the result to the multi-word value z and returning the final carry.
693-
// It can be thought of as one row of a pen-and-paper column multiplication.
694-
func addMulVVW(z, x []uint, y uint) (carry uint) {
695-
_ = x[len(z)-1] // bounds check elimination hint
696-
for i := range z {
697-
hi, lo := bits.Mul(x[i], y)
698-
lo, c := bits.Add(lo, z[i], 0)
699-
// We use bits.Add with zero to get an add-with-carry instruction that
700-
// absorbs the carry from the previous bits.Add.
701-
hi, _ = bits.Add(hi, 0, c)
702-
lo, c = bits.Add(lo, carry, 0)
703-
hi, _ = bits.Add(hi, 0, c)
704-
carry = hi
705-
z[i] = lo
706-
}
707-
return carry
708-
}
709-
710691
// Mul calculates x = x * y mod m.
711692
//
712693
// The length of both operands must be the same as the modulus. Both operands
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
// Copyright 2024 The Go Authors. All rights reserved.
2+
// Use of this source code is governed by a BSD-style
3+
// license that can be found in the LICENSE file.
4+
5+
//go:build !wasm
6+
7+
package bigmod
8+
9+
import "math/bits"
10+
11+
// addMulVVW multiplies the multi-word value x by the single-word value y,
12+
// adding the result to the multi-word value z and returning the final carry.
13+
// It can be thought of as one row of a pen-and-paper column multiplication.
14+
func addMulVVW(z, x []uint, y uint) (carry uint) {
15+
_ = x[len(z)-1] // bounds check elimination hint
16+
for i := range z {
17+
hi, lo := bits.Mul(x[i], y)
18+
lo, c := bits.Add(lo, z[i], 0)
19+
// We use bits.Add with zero to get an add-with-carry instruction that
20+
// absorbs the carry from the previous bits.Add.
21+
hi, _ = bits.Add(hi, 0, c)
22+
lo, c = bits.Add(lo, carry, 0)
23+
hi, _ = bits.Add(hi, 0, c)
24+
carry = hi
25+
z[i] = lo
26+
}
27+
return carry
28+
}
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
// Copyright 2024 The Go Authors. All rights reserved.
2+
// Use of this source code is governed by a BSD-style
3+
// license that can be found in the LICENSE file.
4+
5+
package bigmod
6+
7+
// The generic implementation relies on 64x64->128 bit multiplication and
8+
// 64-bit add-with-carry, which are compiler intrinsics on many architectures.
9+
// Wasm doesn't support those. Here we implement it with 32x32->64 bit
10+
// operations, which is more efficient on Wasm.
11+
12+
// addMulVVW multiplies the multi-word value x by the single-word value y,
13+
// adding the result to the multi-word value z and returning the final carry.
14+
// It can be thought of as one row of a pen-and-paper column multiplication.
15+
func addMulVVW(z, x []uint, y uint) (carry uint) {
16+
const mask32 = 1<<32 - 1
17+
y0 := y & mask32
18+
y1 := y >> 32
19+
_ = x[len(z)-1] // bounds check elimination hint
20+
for i, zi := range z {
21+
xi := x[i]
22+
x0 := xi & mask32
23+
x1 := xi >> 32
24+
z0 := zi & mask32
25+
z1 := zi >> 32
26+
c0 := carry & mask32
27+
c1 := carry >> 32
28+
29+
w00 := x0*y0 + z0 + c0
30+
l00 := w00 & mask32
31+
h00 := w00 >> 32
32+
33+
w01 := x0*y1 + z1 + h00
34+
l01 := w01 & mask32
35+
h01 := w01 >> 32
36+
37+
w10 := x1*y0 + c1 + l01
38+
h10 := w10 >> 32
39+
40+
carry = x1*y1 + h10 + h01
41+
z[i] = w10<<32 + l00
42+
}
43+
return carry
44+
}

0 commit comments

Comments
 (0)