Skip to content

Commit fe24837

Browse files
committed
math/big: add fast path for pure Go addVW for large z
In the normal case, only a few words have to be updated when adding a word to a vector. When that happens, we can simply copy the rest of the words, which is much faster. However, the overhead of that makes it prohibitive for small vectors, so we check the size at the beginning. The implementation is a bit weird to allow addVW to continued to be inlined; see #30548. The AddVW benchmarks are surprising, but fully repeatable. The SubVW benchmarks are more or less as expected. I expect that removing the indirect function call will help both and make them a bit more normal. name old time/op new time/op delta AddVW/1-8 4.27ns ± 2% 3.81ns ± 3% -10.83% (p=0.000 n=89+90) AddVW/2-8 4.91ns ± 2% 4.34ns ± 1% -11.60% (p=0.000 n=83+90) AddVW/3-8 5.77ns ± 4% 5.76ns ± 2% ~ (p=0.365 n=91+87) AddVW/4-8 6.03ns ± 1% 6.03ns ± 1% ~ (p=0.392 n=80+76) AddVW/5-8 6.48ns ± 2% 6.63ns ± 1% +2.27% (p=0.000 n=76+74) AddVW/10-8 9.56ns ± 2% 9.56ns ± 1% -0.02% (p=0.002 n=69+76) AddVW/100-8 90.6ns ± 0% 18.1ns ± 4% -79.99% (p=0.000 n=72+94) AddVW/1000-8 865ns ± 0% 85ns ± 6% -90.14% (p=0.000 n=66+96) AddVW/10000-8 8.57µs ± 2% 1.82µs ± 3% -78.73% (p=0.000 n=99+94) AddVW/100000-8 84.4µs ± 2% 31.8µs ± 4% -62.29% (p=0.000 n=93+98) name old time/op new time/op delta SubVW/1-8 3.90ns ± 2% 4.13ns ± 4% +6.02% (p=0.000 n=92+95) SubVW/2-8 4.15ns ± 1% 5.20ns ± 1% +25.22% (p=0.000 n=83+85) SubVW/3-8 5.50ns ± 2% 6.22ns ± 6% +13.21% (p=0.000 n=91+97) SubVW/4-8 5.99ns ± 1% 6.63ns ± 1% +10.63% (p=0.000 n=79+61) SubVW/5-8 6.75ns ± 4% 6.88ns ± 2% +1.82% (p=0.000 n=98+73) SubVW/10-8 9.57ns ± 1% 9.56ns ± 1% -0.13% (p=0.000 n=77+64) SubVW/100-8 90.3ns ± 1% 18.1ns ± 2% -80.00% (p=0.000 n=75+94) SubVW/1000-8 860ns ± 4% 85ns ± 7% -90.14% (p=0.000 n=97+99) SubVW/10000-8 8.51µs ± 3% 1.77µs ± 6% -79.21% (p=0.000 n=100+97) SubVW/100000-8 84.4µs ± 3% 31.5µs ± 3% -62.66% (p=0.000 n=92+92) Change-Id: I721d7031d40f245b4a284f5bdd93e7bb85e7e937 Reviewed-on: https://go-review.googlesource.com/c/go/+/164968 Run-TryBot: Josh Bleecher Snyder <[email protected]> TryBot-Result: Gobot Gobot <[email protected]> Reviewed-by: Robert Griesemer <[email protected]>
1 parent 4c227a0 commit fe24837

File tree

2 files changed

+54
-4
lines changed

2 files changed

+54
-4
lines changed

src/math/big/arith.go

Lines changed: 42 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,10 @@
33
// license that can be found in the LICENSE file.
44

55
// This file provides Go implementations of elementary multi-precision
6-
// arithmetic operations on word vectors. Needed for platforms without
7-
// assembly implementations of these routines.
6+
// arithmetic operations on word vectors. These have the suffix _g.
7+
// These are needed for platforms without assembly implementations of these routines.
8+
// This file also contains elementary operations that can be implemented
9+
// sufficiently efficiently in Go.
810

911
package big
1012

@@ -98,6 +100,28 @@ func addVW_g(z, x []Word, y Word) (c Word) {
98100
return
99101
}
100102

103+
// addVWlarge is addVW, but intended for large z.
104+
// The only difference is that we check on every iteration
105+
// whether we are done with carries,
106+
// and if so, switch to a much faster copy instead.
107+
// This is only a good idea for large z,
108+
// because the overhead of the check and the function call
109+
// outweigh the benefits when z is small.
110+
func addVWlarge(z, x []Word, y Word) (c Word) {
111+
c = y
112+
// The comment near the top of this file discusses this for loop condition.
113+
for i := 0; i < len(z) && i < len(x); i++ {
114+
if c == 0 {
115+
copy(z[i:], x[i:])
116+
return
117+
}
118+
zi, cc := bits.Add(uint(x[i]), uint(c), 0)
119+
z[i] = Word(zi)
120+
c = Word(cc)
121+
}
122+
return
123+
}
124+
101125
func subVW_g(z, x []Word, y Word) (c Word) {
102126
c = y
103127
// The comment near the top of this file discusses this for loop condition.
@@ -109,6 +133,22 @@ func subVW_g(z, x []Word, y Word) (c Word) {
109133
return
110134
}
111135

136+
// subVWlarge is to subVW as addVWlarge is to addVW.
137+
func subVWlarge(z, x []Word, y Word) (c Word) {
138+
c = y
139+
// The comment near the top of this file discusses this for loop condition.
140+
for i := 0; i < len(z) && i < len(x); i++ {
141+
if c == 0 {
142+
copy(z[i:], x[i:])
143+
return
144+
}
145+
zi, cc := bits.Sub(uint(x[i]), uint(c), 0)
146+
z[i] = Word(zi)
147+
c = Word(cc)
148+
}
149+
return
150+
}
151+
112152
func shlVU_g(z, x []Word, s uint) (c Word) {
113153
if s == 0 {
114154
copy(z, x)

src/math/big/arith_decl_pure.go

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,11 +23,21 @@ func subVV(z, x, y []Word) (c Word) {
2323
}
2424

2525
func addVW(z, x []Word, y Word) (c Word) {
26-
return addVW_g(z, x, y)
26+
// TODO: remove indirect function call when golang.org/issue/30548 is fixed
27+
fn := addVW_g
28+
if len(z) > 32 {
29+
fn = addVWlarge
30+
}
31+
return fn(z, x, y)
2732
}
2833

2934
func subVW(z, x []Word, y Word) (c Word) {
30-
return subVW_g(z, x, y)
35+
// TODO: remove indirect function call when golang.org/issue/30548 is fixed
36+
fn := subVW_g
37+
if len(z) > 32 {
38+
fn = subVWlarge
39+
}
40+
return fn(z, x, y)
3141
}
3242

3343
func shlVU(z, x []Word, s uint) (c Word) {

0 commit comments

Comments
 (0)