From: erifan01 Date: Thu, 28 Mar 2019 10:53:42 +0000 (+0000) Subject: math/big: fix the bug in assembly implementation of shlVU on arm64 X-Git-Tag: go1.13beta1~386 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=503e6ccd740c48f21c1d159d904b51da2d9a8ca9;p=gostls13.git math/big: fix the bug in assembly implementation of shlVU on arm64 For the case where the addresses of parameter z and x of the function shlVU overlap and the address of z is greater than x, x (input value) can be polluted during the calculation when the high words of x are overlapped with the low words of z (output value). Fixes #31084 Change-Id: I9bb0266a1d7856b8faa9a9b1975d6f57dece0479 Reviewed-on: https://go-review.googlesource.com/c/go/+/169780 Run-TryBot: Cherry Zhang TryBot-Result: Gobot Gobot Reviewed-by: Cherry Zhang --- diff --git a/src/math/big/arith_arm64.s b/src/math/big/arith_arm64.s index 114d5f67f2..18e513e2c3 100644 --- a/src/math/big/arith_arm64.s +++ b/src/math/big/arith_arm64.s @@ -194,87 +194,97 @@ len0: MOVD R2, c+56(FP) RET - // func shlVU(z, x []Word, s uint) (c Word) +// This implementation handles the shift operation from the high word to the low word, +// which may be an error for the case where the low word of x overlaps with the high +// word of z. When calling this function directly, you need to pay attention to this +// situation. TEXT ·shlVU(SB),NOSPLIT,$0 - // Disable assembly for now - it is subtly incorrect. - // See #31084 for a test that fails using this code. - B ·shlVU_g(SB) - - MOVD z+0(FP), R0 - MOVD z_len+8(FP), R1 + LDP z+0(FP), (R0, R1) // R0 = z.ptr, R1 = len(z) MOVD x+24(FP), R2 MOVD s+48(FP), R3 - MOVD $0, R8 // in order not to affect the first element, R8 is initialized to zero - MOVD $64, R4 - SUB R3, R4 + ADD R1<<3, R0 // R0 = &z[n] + ADD R1<<3, R2 // R2 = &x[n] CBZ R1, len0 CBZ R3, copy // if the number of shift is 0, just copy x to z - - TBZ $0, R1, two - MOVD.P 8(R2), R6 - LSR R4, R6, R8 - LSL R3, R6 - MOVD.P R6, 8(R0) + MOVD $64, R4 + SUB R3, R4 + // handling the most significant element x[n-1] + MOVD.W -8(R2), R6 + LSR R4, R6, R5 // return value + LSL R3, R6, R8 // x[i] << s SUB $1, R1 +one: TBZ $0, R1, two + MOVD.W -8(R2), R6 + LSR R4, R6, R7 + ORR R8, R7 + LSL R3, R6, R8 + SUB $1, R1 + MOVD.W R7, -8(R0) two: TBZ $1, R1, loop - LDP.P 16(R2), (R6, R7) - LSR R4, R6, R9 - LSL R3, R6 - ORR R8, R6 - LSR R4, R7, R8 + LDP.W -16(R2), (R6, R7) + LSR R4, R7, R10 + ORR R8, R10 LSL R3, R7 - ORR R9, R7 - STP.P (R6, R7), 16(R0) + LSR R4, R6, R9 + ORR R7, R9 + LSL R3, R6, R8 SUB $2, R1 + STP.W (R9, R10), -16(R0) loop: CBZ R1, done - LDP.P 32(R2), (R10, R11) - LDP -16(R2), (R12, R13) - LSR R4, R10, R20 - LSL R3, R10 - ORR R8, R10 // z[i] = (x[i] << s) | (x[i-1] >> (64 - s)) - LSR R4, R11, R21 - LSL R3, R11 - ORR R20, R11 + LDP.W -32(R2), (R10, R11) + LDP 16(R2), (R12, R13) + LSR R4, R13, R23 + ORR R8, R23 // z[i] = (x[i] << s) | (x[i-1] >> (64 - s)) + LSL R3, R13 LSR R4, R12, R22 + ORR R13, R22 LSL R3, R12 - ORR R21, R12 - LSR R4, R13, R8 - LSL R3, R13 - ORR R22, R13 - STP.P (R10, R11), 32(R0) - STP (R12, R13), -16(R0) + LSR R4, R11, R21 + ORR R12, R21 + LSL R3, R11 + LSR R4, R10, R20 + ORR R11, R20 + LSL R3, R10, R8 + STP.W (R20, R21), -32(R0) + STP (R22, R23), 16(R0) SUB $4, R1 B loop done: - MOVD R8, c+56(FP) // the part moved out from the last element + MOVD.W R8, -8(R0) // the first element x[0] + MOVD R5, c+56(FP) // the part moved out from x[n-1] RET copy: + CMP R0, R2 + BEQ len0 TBZ $0, R1, ctwo - MOVD.P 8(R2), R3 - MOVD.P R3, 8(R0) + MOVD.W -8(R2), R4 + MOVD.W R4, -8(R0) SUB $1, R1 ctwo: TBZ $1, R1, cloop - LDP.P 16(R2), (R4, R5) - STP.P (R4, R5), 16(R0) + LDP.W -16(R2), (R4, R5) + STP.W (R4, R5), -16(R0) SUB $2, R1 cloop: CBZ R1, len0 - LDP.P 32(R2), (R4, R5) - LDP -16(R2), (R6, R7) - STP.P (R4, R5), 32(R0) - STP (R6, R7), -16(R0) + LDP.W -32(R2), (R4, R5) + LDP 16(R2), (R6, R7) + STP.W (R4, R5), -32(R0) + STP (R6, R7), 16(R0) SUB $4, R1 B cloop len0: MOVD $0, c+56(FP) RET - // func shrVU(z, x []Word, s uint) (c Word) +// This implementation handles the shift operation from the low word to the high word, +// which may be an error for the case where the high word of x overlaps with the low +// word of z. When calling this function directly, you need to pay attention to this +// situation. TEXT ·shrVU(SB),NOSPLIT,$0 MOVD z+0(FP), R0 MOVD z_len+8(FP), R1 @@ -334,6 +344,8 @@ done: MOVD R8, (R0) // deal with the last element RET copy: + CMP R0, R2 + BEQ len0 TBZ $0, R1, ctwo MOVD.P 8(R2), R3 MOVD.P R3, 8(R0) diff --git a/src/math/big/arith_test.go b/src/math/big/arith_test.go index d28f680688..05136f1895 100644 --- a/src/math/big/arith_test.go +++ b/src/math/big/arith_test.go @@ -213,6 +213,75 @@ func TestFunVW(t *testing.T) { } } +type argVU struct { + d []Word // d is a Word slice, the input parameters x and z come from this array. + l uint // l is the length of the input parameters x and z. + xp uint // xp is the starting position of the input parameter x, x := d[xp:xp+l]. + zp uint // zp is the starting position of the input parameter z, z := d[zp:zp+l]. + s uint // s is the shift number. + r []Word // r is the expected output result z. + c Word // c is the expected return value. + m string // message. +} + +var argshlVU = []argVU{ + // test cases for shlVU + {[]Word{1, _M, _M, _M, _M, _M, 3 << (_W - 2), 0}, 7, 0, 0, 1, []Word{2, _M - 1, _M, _M, _M, _M, 1<<(_W-1) + 1}, 1, "complete overlap of shlVU"}, + {[]Word{1, _M, _M, _M, _M, _M, 3 << (_W - 2), 0, 0, 0, 0}, 7, 0, 3, 1, []Word{2, _M - 1, _M, _M, _M, _M, 1<<(_W-1) + 1}, 1, "partial overlap by half of shlVU"}, + {[]Word{1, _M, _M, _M, _M, _M, 3 << (_W - 2), 0, 0, 0, 0, 0, 0, 0}, 7, 0, 6, 1, []Word{2, _M - 1, _M, _M, _M, _M, 1<<(_W-1) + 1}, 1, "partial overlap by 1 Word of shlVU"}, + {[]Word{1, _M, _M, _M, _M, _M, 3 << (_W - 2), 0, 0, 0, 0, 0, 0, 0, 0}, 7, 0, 7, 1, []Word{2, _M - 1, _M, _M, _M, _M, 1<<(_W-1) + 1}, 1, "no overlap of shlVU"}, +} + +var argshrVU = []argVU{ + // test cases for shrVU + {[]Word{0, 3, _M, _M, _M, _M, _M, 1 << (_W - 1)}, 7, 1, 1, 1, []Word{1<<(_W-1) + 1, _M, _M, _M, _M, _M >> 1, 1 << (_W - 2)}, 1 << (_W - 1), "complete overlap of shrVU"}, + {[]Word{0, 0, 0, 0, 3, _M, _M, _M, _M, _M, 1 << (_W - 1)}, 7, 4, 1, 1, []Word{1<<(_W-1) + 1, _M, _M, _M, _M, _M >> 1, 1 << (_W - 2)}, 1 << (_W - 1), "partial overlap by half of shrVU"}, + {[]Word{0, 0, 0, 0, 0, 0, 0, 3, _M, _M, _M, _M, _M, 1 << (_W - 1)}, 7, 7, 1, 1, []Word{1<<(_W-1) + 1, _M, _M, _M, _M, _M >> 1, 1 << (_W - 2)}, 1 << (_W - 1), "partial overlap by 1 Word of shrVU"}, + {[]Word{0, 0, 0, 0, 0, 0, 0, 0, 3, _M, _M, _M, _M, _M, 1 << (_W - 1)}, 7, 8, 1, 1, []Word{1<<(_W-1) + 1, _M, _M, _M, _M, _M >> 1, 1 << (_W - 2)}, 1 << (_W - 1), "no overlap of shrVU"}, +} + +func testShiftFunc(t *testing.T, f func(z, x []Word, s uint) Word, a argVU) { + // save a.d for error message, or it will be overwritten. + b := make([]Word, len(a.d)) + copy(b, a.d) + z := a.d[a.zp : a.zp+a.l] + x := a.d[a.xp : a.xp+a.l] + c := f(z, x, a.s) + for i, zi := range z { + if zi != a.r[i] { + t.Errorf("d := %v, %s(d[%d:%d], d[%d:%d], %d)\n\tgot z[%d] = %#x; want %#x", b, a.m, a.zp, a.zp+a.l, a.xp, a.xp+a.l, a.s, i, zi, a.r[i]) + break + } + } + if c != a.c { + t.Errorf("d := %v, %s(d[%d:%d], d[%d:%d], %d)\n\tgot c = %#x; want %#x", b, a.m, a.zp, a.zp+a.l, a.xp, a.xp+a.l, a.s, c, a.c) + } +} + +func TestShiftOverlap(t *testing.T) { + for _, a := range argshlVU { + arg := a + testShiftFunc(t, shlVU, arg) + } + + for _, a := range argshrVU { + arg := a + testShiftFunc(t, shrVU, arg) + } +} + +func TestIssue31084(t *testing.T) { + // compute 10^n via 5^n << n. + const n = 165 + p := nat(nil).expNN(nat{5}, nat{n}, nil) + p = p.shl(p, uint(n)) + got := string(p.utoa(10)) + want := "1" + strings.Repeat("0", n) + if got != want { + t.Errorf("shl(%v, %v)\n\tgot %s; want %s\n", p, uint(n), got, want) + } +} + func BenchmarkAddVW(b *testing.B) { for _, n := range benchSizes { if isRaceBuilder && n > 1e3 {