Skip to content

Commit 2da8ca4

Browse files
erifan01ianlancetaylor
erifan01
authored andcommitted
[release-branch.go1.12] math/big: fix the bug in assembly implementation of shlVU on arm64
For the case where the addresses of parameter z and x of the function shlVU overlap and the address of z is greater than x, x (input value) can be polluted during the calculation when the high words of x are overlapped with the low words of z (output value). Updates #31084 Fixes #32940 Change-Id: I9bb0266a1d7856b8faa9a9b1975d6f57dece0479 Reviewed-on: https://go-review.googlesource.com/c/go/+/169780 Run-TryBot: Cherry Zhang <[email protected]> TryBot-Result: Gobot Gobot <[email protected]> Reviewed-by: Cherry Zhang <[email protected]> (cherry picked from commit 503e6cc) Reviewed-on: https://go-review.googlesource.com/c/go/+/185041 Run-TryBot: Dmitri Shuralyov <[email protected]>
1 parent 047a326 commit 2da8ca4

File tree

2 files changed

+128
-43
lines changed

2 files changed

+128
-43
lines changed

src/math/big/arith_arm64.s

Lines changed: 59 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -194,83 +194,97 @@ len0:
194194
MOVD R2, c+56(FP)
195195
RET
196196

197-
198197
// func shlVU(z, x []Word, s uint) (c Word)
198+
// This implementation handles the shift operation from the high word to the low word,
199+
// which may be an error for the case where the low word of x overlaps with the high
200+
// word of z. When calling this function directly, you need to pay attention to this
201+
// situation.
199202
TEXT ·shlVU(SB),NOSPLIT,$0
200-
MOVD z+0(FP), R0
201-
MOVD z_len+8(FP), R1
203+
LDP z+0(FP), (R0, R1) // R0 = z.ptr, R1 = len(z)
202204
MOVD x+24(FP), R2
203205
MOVD s+48(FP), R3
204-
MOVD $0, R8 // in order not to affect the first element, R8 is initialized to zero
205-
MOVD $64, R4
206-
SUB R3, R4
206+
ADD R1<<3, R0 // R0 = &z[n]
207+
ADD R1<<3, R2 // R2 = &x[n]
207208
CBZ R1, len0
208209
CBZ R3, copy // if the number of shift is 0, just copy x to z
209-
210-
TBZ $0, R1, two
211-
MOVD.P 8(R2), R6
212-
LSR R4, R6, R8
213-
LSL R3, R6
214-
MOVD.P R6, 8(R0)
210+
MOVD $64, R4
211+
SUB R3, R4
212+
// handling the most significant element x[n-1]
213+
MOVD.W -8(R2), R6
214+
LSR R4, R6, R5 // return value
215+
LSL R3, R6, R8 // x[i] << s
216+
SUB $1, R1
217+
one: TBZ $0, R1, two
218+
MOVD.W -8(R2), R6
219+
LSR R4, R6, R7
220+
ORR R8, R7
221+
LSL R3, R6, R8
215222
SUB $1, R1
223+
MOVD.W R7, -8(R0)
216224
two:
217225
TBZ $1, R1, loop
218-
LDP.P 16(R2), (R6, R7)
219-
LSR R4, R6, R9
220-
LSL R3, R6
221-
ORR R8, R6
222-
LSR R4, R7, R8
226+
LDP.W -16(R2), (R6, R7)
227+
LSR R4, R7, R10
228+
ORR R8, R10
223229
LSL R3, R7
224-
ORR R9, R7
225-
STP.P (R6, R7), 16(R0)
230+
LSR R4, R6, R9
231+
ORR R7, R9
232+
LSL R3, R6, R8
226233
SUB $2, R1
234+
STP.W (R9, R10), -16(R0)
227235
loop:
228236
CBZ R1, done
229-
LDP.P 32(R2), (R10, R11)
230-
LDP -16(R2), (R12, R13)
231-
LSR R4, R10, R20
232-
LSL R3, R10
233-
ORR R8, R10 // z[i] = (x[i] << s) | (x[i-1] >> (64 - s))
234-
LSR R4, R11, R21
235-
LSL R3, R11
236-
ORR R20, R11
237+
LDP.W -32(R2), (R10, R11)
238+
LDP 16(R2), (R12, R13)
239+
LSR R4, R13, R23
240+
ORR R8, R23 // z[i] = (x[i] << s) | (x[i-1] >> (64 - s))
241+
LSL R3, R13
237242
LSR R4, R12, R22
243+
ORR R13, R22
238244
LSL R3, R12
239-
ORR R21, R12
240-
LSR R4, R13, R8
241-
LSL R3, R13
242-
ORR R22, R13
243-
STP.P (R10, R11), 32(R0)
244-
STP (R12, R13), -16(R0)
245+
LSR R4, R11, R21
246+
ORR R12, R21
247+
LSL R3, R11
248+
LSR R4, R10, R20
249+
ORR R11, R20
250+
LSL R3, R10, R8
251+
STP.W (R20, R21), -32(R0)
252+
STP (R22, R23), 16(R0)
245253
SUB $4, R1
246254
B loop
247255
done:
248-
MOVD R8, c+56(FP) // the part moved out from the last element
256+
MOVD.W R8, -8(R0) // the first element x[0]
257+
MOVD R5, c+56(FP) // the part moved out from x[n-1]
249258
RET
250259
copy:
260+
CMP R0, R2
261+
BEQ len0
251262
TBZ $0, R1, ctwo
252-
MOVD.P 8(R2), R3
253-
MOVD.P R3, 8(R0)
263+
MOVD.W -8(R2), R4
264+
MOVD.W R4, -8(R0)
254265
SUB $1, R1
255266
ctwo:
256267
TBZ $1, R1, cloop
257-
LDP.P 16(R2), (R4, R5)
258-
STP.P (R4, R5), 16(R0)
268+
LDP.W -16(R2), (R4, R5)
269+
STP.W (R4, R5), -16(R0)
259270
SUB $2, R1
260271
cloop:
261272
CBZ R1, len0
262-
LDP.P 32(R2), (R4, R5)
263-
LDP -16(R2), (R6, R7)
264-
STP.P (R4, R5), 32(R0)
265-
STP (R6, R7), -16(R0)
273+
LDP.W -32(R2), (R4, R5)
274+
LDP 16(R2), (R6, R7)
275+
STP.W (R4, R5), -32(R0)
276+
STP (R6, R7), 16(R0)
266277
SUB $4, R1
267278
B cloop
268279
len0:
269280
MOVD $0, c+56(FP)
270281
RET
271282

272-
273283
// func shrVU(z, x []Word, s uint) (c Word)
284+
// This implementation handles the shift operation from the low word to the high word,
285+
// which may be an error for the case where the high word of x overlaps with the low
286+
// word of z. When calling this function directly, you need to pay attention to this
287+
// situation.
274288
TEXT ·shrVU(SB),NOSPLIT,$0
275289
MOVD z+0(FP), R0
276290
MOVD z_len+8(FP), R1
@@ -330,6 +344,8 @@ done:
330344
MOVD R8, (R0) // deal with the last element
331345
RET
332346
copy:
347+
CMP R0, R2
348+
BEQ len0
333349
TBZ $0, R1, ctwo
334350
MOVD.P 8(R2), R3
335351
MOVD.P R3, 8(R0)

src/math/big/arith_test.go

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -255,6 +255,75 @@ func TestFunVW(t *testing.T) {
255255
}
256256
}
257257

258+
type argVU struct {
259+
d []Word // d is a Word slice, the input parameters x and z come from this array.
260+
l uint // l is the length of the input parameters x and z.
261+
xp uint // xp is the starting position of the input parameter x, x := d[xp:xp+l].
262+
zp uint // zp is the starting position of the input parameter z, z := d[zp:zp+l].
263+
s uint // s is the shift number.
264+
r []Word // r is the expected output result z.
265+
c Word // c is the expected return value.
266+
m string // message.
267+
}
268+
269+
var argshlVU = []argVU{
270+
// test cases for shlVU
271+
{[]Word{1, _M, _M, _M, _M, _M, 3 << (_W - 2), 0}, 7, 0, 0, 1, []Word{2, _M - 1, _M, _M, _M, _M, 1<<(_W-1) + 1}, 1, "complete overlap of shlVU"},
272+
{[]Word{1, _M, _M, _M, _M, _M, 3 << (_W - 2), 0, 0, 0, 0}, 7, 0, 3, 1, []Word{2, _M - 1, _M, _M, _M, _M, 1<<(_W-1) + 1}, 1, "partial overlap by half of shlVU"},
273+
{[]Word{1, _M, _M, _M, _M, _M, 3 << (_W - 2), 0, 0, 0, 0, 0, 0, 0}, 7, 0, 6, 1, []Word{2, _M - 1, _M, _M, _M, _M, 1<<(_W-1) + 1}, 1, "partial overlap by 1 Word of shlVU"},
274+
{[]Word{1, _M, _M, _M, _M, _M, 3 << (_W - 2), 0, 0, 0, 0, 0, 0, 0, 0}, 7, 0, 7, 1, []Word{2, _M - 1, _M, _M, _M, _M, 1<<(_W-1) + 1}, 1, "no overlap of shlVU"},
275+
}
276+
277+
var argshrVU = []argVU{
278+
// test cases for shrVU
279+
{[]Word{0, 3, _M, _M, _M, _M, _M, 1 << (_W - 1)}, 7, 1, 1, 1, []Word{1<<(_W-1) + 1, _M, _M, _M, _M, _M >> 1, 1 << (_W - 2)}, 1 << (_W - 1), "complete overlap of shrVU"},
280+
{[]Word{0, 0, 0, 0, 3, _M, _M, _M, _M, _M, 1 << (_W - 1)}, 7, 4, 1, 1, []Word{1<<(_W-1) + 1, _M, _M, _M, _M, _M >> 1, 1 << (_W - 2)}, 1 << (_W - 1), "partial overlap by half of shrVU"},
281+
{[]Word{0, 0, 0, 0, 0, 0, 0, 3, _M, _M, _M, _M, _M, 1 << (_W - 1)}, 7, 7, 1, 1, []Word{1<<(_W-1) + 1, _M, _M, _M, _M, _M >> 1, 1 << (_W - 2)}, 1 << (_W - 1), "partial overlap by 1 Word of shrVU"},
282+
{[]Word{0, 0, 0, 0, 0, 0, 0, 0, 3, _M, _M, _M, _M, _M, 1 << (_W - 1)}, 7, 8, 1, 1, []Word{1<<(_W-1) + 1, _M, _M, _M, _M, _M >> 1, 1 << (_W - 2)}, 1 << (_W - 1), "no overlap of shrVU"},
283+
}
284+
285+
func testShiftFunc(t *testing.T, f func(z, x []Word, s uint) Word, a argVU) {
286+
// save a.d for error message, or it will be overwritten.
287+
b := make([]Word, len(a.d))
288+
copy(b, a.d)
289+
z := a.d[a.zp : a.zp+a.l]
290+
x := a.d[a.xp : a.xp+a.l]
291+
c := f(z, x, a.s)
292+
for i, zi := range z {
293+
if zi != a.r[i] {
294+
t.Errorf("d := %v, %s(d[%d:%d], d[%d:%d], %d)\n\tgot z[%d] = %#x; want %#x", b, a.m, a.zp, a.zp+a.l, a.xp, a.xp+a.l, a.s, i, zi, a.r[i])
295+
break
296+
}
297+
}
298+
if c != a.c {
299+
t.Errorf("d := %v, %s(d[%d:%d], d[%d:%d], %d)\n\tgot c = %#x; want %#x", b, a.m, a.zp, a.zp+a.l, a.xp, a.xp+a.l, a.s, c, a.c)
300+
}
301+
}
302+
303+
func TestShiftOverlap(t *testing.T) {
304+
for _, a := range argshlVU {
305+
arg := a
306+
testShiftFunc(t, shlVU, arg)
307+
}
308+
309+
for _, a := range argshrVU {
310+
arg := a
311+
testShiftFunc(t, shrVU, arg)
312+
}
313+
}
314+
315+
func TestIssue31084(t *testing.T) {
316+
// compute 10^n via 5^n << n.
317+
const n = 165
318+
p := nat(nil).expNN(nat{5}, nat{n}, nil)
319+
p = p.shl(p, uint(n))
320+
got := string(p.utoa(10))
321+
want := "1" + strings.Repeat("0", n)
322+
if got != want {
323+
t.Errorf("shl(%v, %v)\n\tgot %s; want %s\n", p, uint(n), got, want)
324+
}
325+
}
326+
258327
func BenchmarkAddVW(b *testing.B) {
259328
for _, n := range benchSizes {
260329
if isRaceBuilder && n > 1e3 {

0 commit comments

Comments
 (0)