Skip to content

Commit 0184b44

Browse files
remyoudomphengodeke-em
authored andcommitted
strconv: implement Ryū-like algorithm for fixed precision ftoa
This patch implements a simplified version of Ulf Adams, "Ryū: Fast Float-to-String Conversion" (doi:10.1145/3192366.3192369) for formatting floating-point numbers with a fixed number of decimal digits. It uses the same principles but does not need to handle the complex task of finding a shortest representation. This allows to handle a few more cases than Grisu3, notably formatting with up to 18 significant digits. name old time/op new time/op delta AppendFloat/32Fixed8Hard-4 72.0ns ± 2% 56.0ns ± 2% -22.28% (p=0.000 n=10+10) AppendFloat/32Fixed9Hard-4 74.8ns ± 0% 64.2ns ± 2% -14.16% (p=0.000 n=8+10) AppendFloat/64Fixed1-4 60.4ns ± 1% 54.2ns ± 1% -10.31% (p=0.000 n=10+9) AppendFloat/64Fixed2-4 66.3ns ± 1% 53.3ns ± 1% -19.54% (p=0.000 n=10+9) AppendFloat/64Fixed3-4 61.0ns ± 1% 55.0ns ± 2% -9.80% (p=0.000 n=9+10) AppendFloat/64Fixed4-4 66.9ns ± 0% 52.0ns ± 2% -22.20% (p=0.000 n=8+10) AppendFloat/64Fixed12-4 95.5ns ± 1% 76.2ns ± 3% -20.19% (p=0.000 n=10+9) AppendFloat/64Fixed16-4 1.62µs ± 0% 0.07µs ± 2% -95.69% (p=0.000 n=10+10) AppendFloat/64Fixed12Hard-4 1.27µs ± 1% 0.07µs ± 1% -94.83% (p=0.000 n=9+9) AppendFloat/64Fixed17Hard-4 3.68µs ± 1% 0.08µs ± 2% -97.86% (p=0.000 n=10+9) AppendFloat/64Fixed18Hard-4 3.67µs ± 0% 3.72µs ± 1% +1.44% (p=0.000 n=9+10) Updates #15672 Change-Id: I160963e141dd48287ad8cf57bcc3c686277788e8 Reviewed-on: https://go-review.googlesource.com/c/go/+/170079 Reviewed-by: Emmanuel Odeke <[email protected]> Trust: Emmanuel Odeke <[email protected]> Trust: Nigel Tao <[email protected]> Trust: Robert Griesemer <[email protected]> Run-TryBot: Emmanuel Odeke <[email protected]> TryBot-Result: Go Bot <[email protected]>
1 parent 8f4c506 commit 0184b44

File tree

5 files changed

+374
-5
lines changed

5 files changed

+374
-5
lines changed

src/strconv/ftoa.go

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -143,12 +143,15 @@ func genericFtoa(dst []byte, val float64, fmt byte, prec, bitSize int) []byte {
143143
}
144144
digits = prec
145145
}
146-
if digits <= 15 {
147-
// try fast algorithm when the number of digits is reasonable.
148-
var buf [24]byte
146+
var buf [24]byte
147+
if bitSize == 32 && digits <= 9 {
149148
digs.d = buf[:]
150-
f := extFloat{mant, exp - int(flt.mantbits), neg}
151-
ok = f.FixedDecimal(&digs, digits)
149+
ryuFtoaFixed32(&digs, uint32(mant), exp-int(flt.mantbits), digits)
150+
ok = true
151+
} else if digits <= 18 {
152+
digs.d = buf[:]
153+
ryuFtoaFixed64(&digs, mant, exp-int(flt.mantbits), digits)
154+
ok = true
152155
}
153156
}
154157
if !ok {

src/strconv/ftoa_test.go

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,14 @@ var ftoatests = []ftoaTest{
7777
{1.2345e6, 'f', 5, "1234500.00000"},
7878
{1.2345e6, 'g', 5, "1.2345e+06"},
7979

80+
// Round to even
81+
{1.2345e6, 'e', 3, "1.234e+06"},
82+
{1.2355e6, 'e', 3, "1.236e+06"},
83+
{1.2345, 'f', 3, "1.234"},
84+
{1.2355, 'f', 3, "1.236"},
85+
{1234567890123456.5, 'e', 15, "1.234567890123456e+15"},
86+
{1234567890123457.5, 'e', 15, "1.234567890123458e+15"},
87+
8088
{1e23, 'e', 17, "9.99999999999999916e+22"},
8189
{1e23, 'f', 17, "99999999999999991611392.00000000000000000"},
8290
{1e23, 'g', 17, "9.9999999999999992e+22"},
@@ -241,11 +249,19 @@ var ftoaBenches = []struct {
241249
{"32Point", 339.7784, 'g', -1, 32},
242250
{"32Exp", -5.09e25, 'g', -1, 32},
243251
{"32NegExp", -5.11e-25, 'g', -1, 32},
252+
{"32Fixed8Hard", math.Ldexp(15961084, -125), 'e', 8, 32},
253+
{"32Fixed9Hard", math.Ldexp(14855922, -83), 'e', 9, 32},
244254

245255
{"64Fixed1", 123456, 'e', 3, 64},
246256
{"64Fixed2", 123.456, 'e', 3, 64},
247257
{"64Fixed3", 1.23456e+78, 'e', 3, 64},
248258
{"64Fixed4", 1.23456e-78, 'e', 3, 64},
259+
{"64Fixed12", 1.23456e-78, 'e', 12, 64},
260+
{"64Fixed16", 1.23456e-78, 'e', 16, 64},
261+
// From testdata/testfp.txt
262+
{"64Fixed12Hard", math.Ldexp(6965949469487146, -249), 'e', 12, 64},
263+
{"64Fixed17Hard", math.Ldexp(8887055249355788, 665), 'e', 17, 64},
264+
{"64Fixed18Hard", math.Ldexp(6994187472632449, 690), 'e', 18, 64},
249265

250266
// Trigger slow path (see issue #15672).
251267
{"Slowpath64", 622666234635.3213e-320, 'e', -1, 64},

src/strconv/ftoaryu.go

Lines changed: 311 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,311 @@
1+
// Copyright 2021 The Go Authors. All rights reserved.
2+
// Use of this source code is governed by a BSD-style
3+
// license that can be found in the LICENSE file.
4+
5+
package strconv
6+
7+
import (
8+
"math/bits"
9+
)
10+
11+
// binary to decimal conversion using the Ryū algorithm.
12+
//
13+
// See Ulf Adams, "Ryū: Fast Float-to-String Conversion" (doi:10.1145/3192366.3192369)
14+
//
15+
// Fixed precision formatting is a variant of the original paper's
16+
// algorithm, where a single multiplication by 10^k is required,
17+
// sharing the same rounding guarantees.
18+
19+
// ryuFtoaFixed32 formats mant*(2^exp) with prec decimal digits.
20+
func ryuFtoaFixed32(d *decimalSlice, mant uint32, exp int, prec int) {
21+
if prec < 0 {
22+
panic("ryuFtoaFixed32 called with negative prec")
23+
}
24+
if prec > 9 {
25+
panic("ryuFtoaFixed32 called with prec > 9")
26+
}
27+
// Zero input.
28+
if mant == 0 {
29+
d.nd, d.dp = 0, 0
30+
return
31+
}
32+
// Renormalize to a 25-bit mantissa.
33+
e2 := exp
34+
if b := bits.Len32(mant); b < 25 {
35+
mant <<= uint(25 - b)
36+
e2 += int(b) - 25
37+
}
38+
// Choose an exponent such that rounded mant*(2^e2)*(10^q) has
39+
// at least prec decimal digits, i.e
40+
// mant*(2^e2)*(10^q) >= 10^(prec-1)
41+
// Because mant >= 2^24, it is enough to choose:
42+
// 2^(e2+24) >= 10^(-q+prec-1)
43+
// or q = -mulByLog2Log10(e2+24) + prec - 1
44+
q := -mulByLog2Log10(e2+24) + prec - 1
45+
46+
// Now compute mant*(2^e2)*(10^q).
47+
// Is it an exact computation?
48+
// Only small positive powers of 10 are exact (5^28 has 66 bits).
49+
exact := q <= 27 && q >= 0
50+
51+
di, dexp2, d0 := mult64bitPow10(mant, e2, q)
52+
if dexp2 >= 0 {
53+
panic("not enough significant bits after mult64bitPow10")
54+
}
55+
// As a special case, computation might still be exact, if exponent
56+
// was negative and if it amounts to computing an exact division.
57+
// In that case, we ignore all lower bits.
58+
// Note that division by 10^11 cannot be exact as 5^11 has 26 bits.
59+
if q < 0 && q >= -10 && divisibleByPower5(uint64(mant), -q) {
60+
exact = true
61+
d0 = true
62+
}
63+
// Remove extra lower bits and keep rounding info.
64+
extra := uint(-dexp2)
65+
extraMask := uint32(1<<extra - 1)
66+
67+
di, dfrac := di>>extra, di&extraMask
68+
roundUp := false
69+
if exact {
70+
// If we computed an exact product, d + 1/2
71+
// should round to d+1 if 'd' is odd.
72+
roundUp = dfrac > 1<<(extra-1) ||
73+
(dfrac == 1<<(extra-1) && !d0) ||
74+
(dfrac == 1<<(extra-1) && d0 && di&1 == 1)
75+
} else {
76+
// otherwise, d+1/2 always rounds up because
77+
// we truncated below.
78+
roundUp = dfrac>>(extra-1) == 1
79+
}
80+
if dfrac != 0 {
81+
d0 = false
82+
}
83+
// Proceed to the requested number of digits
84+
formatDecimal(d, uint64(di), !d0, roundUp, prec)
85+
// Adjust exponent
86+
d.dp -= q
87+
}
88+
89+
// ryuFtoaFixed64 formats mant*(2^exp) with prec decimal digits.
90+
func ryuFtoaFixed64(d *decimalSlice, mant uint64, exp int, prec int) {
91+
if prec > 18 {
92+
panic("ryuFtoaFixed64 called with prec > 18")
93+
}
94+
// Zero input.
95+
if mant == 0 {
96+
d.nd, d.dp = 0, 0
97+
return
98+
}
99+
// Renormalize to a 55-bit mantissa.
100+
e2 := exp
101+
if b := bits.Len64(mant); b < 55 {
102+
mant = mant << uint(55-b)
103+
e2 += int(b) - 55
104+
}
105+
// Choose an exponent such that rounded mant*(2^e2)*(10^q) has
106+
// at least prec decimal digits, i.e
107+
// mant*(2^e2)*(10^q) >= 10^(prec-1)
108+
// Because mant >= 2^54, it is enough to choose:
109+
// 2^(e2+54) >= 10^(-q+prec-1)
110+
// or q = -mulByLog2Log10(e2+54) + prec - 1
111+
//
112+
// The minimal required exponent is -mulByLog2Log10(1025)+18 = -291
113+
// The maximal required exponent is mulByLog2Log10(1074)+18 = 342
114+
q := -mulByLog2Log10(e2+54) + prec - 1
115+
116+
// Now compute mant*(2^e2)*(10^q).
117+
// Is it an exact computation?
118+
// Only small positive powers of 10 are exact (5^55 has 128 bits).
119+
exact := q <= 55 && q >= 0
120+
121+
di, dexp2, d0 := mult128bitPow10(mant, e2, q)
122+
if dexp2 >= 0 {
123+
panic("not enough significant bits after mult128bitPow10")
124+
}
125+
// As a special case, computation might still be exact, if exponent
126+
// was negative and if it amounts to computing an exact division.
127+
// In that case, we ignore all lower bits.
128+
// Note that division by 10^23 cannot be exact as 5^23 has 54 bits.
129+
if q < 0 && q >= -22 && divisibleByPower5(mant, -q) {
130+
exact = true
131+
d0 = true
132+
}
133+
// Remove extra lower bits and keep rounding info.
134+
extra := uint(-dexp2)
135+
extraMask := uint64(1<<extra - 1)
136+
137+
di, dfrac := di>>extra, di&extraMask
138+
roundUp := false
139+
if exact {
140+
// If we computed an exact product, d + 1/2
141+
// should round to d+1 if 'd' is odd.
142+
roundUp = dfrac > 1<<(extra-1) ||
143+
(dfrac == 1<<(extra-1) && !d0) ||
144+
(dfrac == 1<<(extra-1) && d0 && di&1 == 1)
145+
} else {
146+
// otherwise, d+1/2 always rounds up because
147+
// we truncated below.
148+
roundUp = dfrac>>(extra-1) == 1
149+
}
150+
if dfrac != 0 {
151+
d0 = false
152+
}
153+
// Proceed to the requested number of digits
154+
formatDecimal(d, di, !d0, roundUp, prec)
155+
// Adjust exponent
156+
d.dp -= q
157+
}
158+
159+
// formatDecimal fills d with at most prec decimal digits
160+
// of mantissa m. The boolean trunc indicates whether m
161+
// is truncated compared to the original number being formatted.
162+
func formatDecimal(d *decimalSlice, m uint64, trunc bool, roundUp bool, prec int) {
163+
max := uint64pow10[prec]
164+
trimmed := 0
165+
for m >= max {
166+
a, b := m/10, m%10
167+
m = a
168+
trimmed++
169+
if b > 5 {
170+
roundUp = true
171+
} else if b < 5 {
172+
roundUp = false
173+
} else { // b == 5
174+
// round up if there are trailing digits,
175+
// or if the new value of m is odd (round-to-even convention)
176+
roundUp = trunc || m&1 == 1
177+
}
178+
if b != 0 {
179+
trunc = true
180+
}
181+
}
182+
if roundUp {
183+
m++
184+
}
185+
if m >= max {
186+
// Happens if di was originally 99999....xx
187+
m /= 10
188+
trimmed++
189+
}
190+
// render digits (similar to formatBits)
191+
n := uint(prec)
192+
d.nd = int(prec)
193+
v := m
194+
for v >= 100 {
195+
var v1, v2 uint64
196+
if v>>32 == 0 {
197+
v1, v2 = uint64(uint32(v)/100), uint64(uint32(v)%100)
198+
} else {
199+
v1, v2 = v/100, v%100
200+
}
201+
n -= 2
202+
d.d[n+1] = smallsString[2*v2+1]
203+
d.d[n+0] = smallsString[2*v2+0]
204+
v = v1
205+
}
206+
if v > 0 {
207+
n--
208+
d.d[n] = smallsString[2*v+1]
209+
}
210+
if v >= 10 {
211+
n--
212+
d.d[n] = smallsString[2*v]
213+
}
214+
for d.d[d.nd-1] == '0' {
215+
d.nd--
216+
trimmed++
217+
}
218+
d.dp = d.nd + trimmed
219+
}
220+
221+
// mulByLog2Log10 returns math.Floor(x * log(2)/log(10)) for an integer x in
222+
// the range -1600 <= x && x <= +1600.
223+
//
224+
// The range restriction lets us work in faster integer arithmetic instead of
225+
// slower floating point arithmetic. Correctness is verified by unit tests.
226+
func mulByLog2Log10(x int) int {
227+
// log(2)/log(10) ≈ 0.30102999566 ≈ 78913 / 2^18
228+
return (x * 78913) >> 18
229+
}
230+
231+
// mulByLog10Log2 returns math.Floor(x * log(10)/log(2)) for an integer x in
232+
// the range -500 <= x && x <= +500.
233+
//
234+
// The range restriction lets us work in faster integer arithmetic instead of
235+
// slower floating point arithmetic. Correctness is verified by unit tests.
236+
func mulByLog10Log2(x int) int {
237+
// log(10)/log(2) ≈ 3.32192809489 ≈ 108853 / 2^15
238+
return (x * 108853) >> 15
239+
}
240+
241+
// mult64bitPow10 takes a floating-point input with a 25-bit
242+
// mantissa and multiplies it with 10^q. The resulting mantissa
243+
// is m*P >> 57 where P is a 64-bit element of the detailedPowersOfTen tables.
244+
// It is typically 31 or 32-bit wide.
245+
// The returned boolean is true if all trimmed bits were zero.
246+
//
247+
// That is:
248+
// m*2^e2 * round(10^q) = resM * 2^resE + ε
249+
// exact = ε == 0
250+
func mult64bitPow10(m uint32, e2, q int) (resM uint32, resE int, exact bool) {
251+
if q == 0 {
252+
return m << 7, e2 - 7, true
253+
}
254+
if q < detailedPowersOfTenMinExp10 || detailedPowersOfTenMaxExp10 < q {
255+
// This never happens due to the range of float32/float64 exponent
256+
panic("mult64bitPow10: power of 10 is out of range")
257+
}
258+
pow := detailedPowersOfTen[q-detailedPowersOfTenMinExp10][1]
259+
if q < 0 {
260+
// Inverse powers of ten must be rounded up.
261+
pow += 1
262+
}
263+
hi, lo := bits.Mul64(uint64(m), pow)
264+
e2 += mulByLog10Log2(q) - 63 + 57
265+
return uint32(hi<<7 | lo>>57), e2, lo<<7 == 0
266+
}
267+
268+
// mult128bitPow10 takes a floating-point input with a 55-bit
269+
// mantissa and multiplies it with 10^q. The resulting mantissa
270+
// is m*P >> 119 where P is a 128-bit element of the detailedPowersOfTen tables.
271+
// It is typically 63 or 64-bit wide.
272+
// The returned boolean is true is all trimmed bits were zero.
273+
//
274+
// That is:
275+
// m*2^e2 * round(10^q) = resM * 2^resE + ε
276+
// exact = ε == 0
277+
func mult128bitPow10(m uint64, e2, q int) (resM uint64, resE int, exact bool) {
278+
if q == 0 {
279+
return m << 9, e2 - 9, true
280+
}
281+
if q < detailedPowersOfTenMinExp10 || detailedPowersOfTenMaxExp10 < q {
282+
// This never happens due to the range of float32/float64 exponent
283+
panic("mult128bitPow10: power of 10 is out of range")
284+
}
285+
pow := detailedPowersOfTen[q-detailedPowersOfTenMinExp10]
286+
if q < 0 {
287+
// Inverse powers of ten must be rounded up.
288+
pow[0] += 1
289+
}
290+
e2 += mulByLog10Log2(q) - 127 + 119
291+
292+
// long multiplication
293+
l1, l0 := bits.Mul64(m, pow[0])
294+
h1, h0 := bits.Mul64(m, pow[1])
295+
mid, carry := bits.Add64(l1, h0, 0)
296+
h1 += carry
297+
return h1<<9 | mid>>55, e2, mid<<9 == 0 && l0 == 0
298+
}
299+
300+
func divisibleByPower5(m uint64, k int) bool {
301+
if m == 0 {
302+
return true
303+
}
304+
for i := 0; i < k; i++ {
305+
if m%5 != 0 {
306+
return false
307+
}
308+
m /= 5
309+
}
310+
return true
311+
}

src/strconv/ftoaryu_test.go

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
// Copyright 2021 The Go Authors. All rights reserved.
2+
// Use of this source code is governed by a BSD-style
3+
// license that can be found in the LICENSE file.
4+
5+
package strconv_test
6+
7+
import (
8+
"math"
9+
. "strconv"
10+
"testing"
11+
)
12+
13+
func TestMulByLog2Log10(t *testing.T) {
14+
for x := -1600; x <= +1600; x++ {
15+
iMath := MulByLog2Log10(x)
16+
fMath := int(math.Floor(float64(x) * math.Ln2 / math.Ln10))
17+
if iMath != fMath {
18+
t.Errorf("mulByLog2Log10(%d) failed: %d vs %d\n", x, iMath, fMath)
19+
}
20+
}
21+
}
22+
23+
func TestMulByLog10Log2(t *testing.T) {
24+
for x := -500; x <= +500; x++ {
25+
iMath := MulByLog10Log2(x)
26+
fMath := int(math.Floor(float64(x) * math.Ln10 / math.Ln2))
27+
if iMath != fMath {
28+
t.Errorf("mulByLog10Log2(%d) failed: %d vs %d\n", x, iMath, fMath)
29+
}
30+
}
31+
}

0 commit comments

Comments
 (0)