From b2794a1c2ed8c74563cf28d9e4a9b3f1db43ef1f Mon Sep 17 00:00:00 2001 From: Russ Cox Date: Mon, 16 Sep 2013 14:04:45 -0400 Subject: [PATCH] runtime: make ARM integer div-by-zero traceback-friendly The implementation of division in the 5 toolchain is a bit too magical. Hide the magic from the traceback routines. Also add a test for the results of the software divide routine. Fixes #5805. R=golang-dev, minux.ma CC=golang-dev https://golang.org/cl/13239052 --- src/pkg/runtime/vlop_arm.s | 45 ++-- test/divmod.go | 460 +++++++++++++++++++++++++++++++++++++ 2 files changed, 487 insertions(+), 18 deletions(-) create mode 100644 test/divmod.go diff --git a/src/pkg/runtime/vlop_arm.s b/src/pkg/runtime/vlop_arm.s index 2437cb5d2b..d7c566afb8 100644 --- a/src/pkg/runtime/vlop_arm.s +++ b/src/pkg/runtime/vlop_arm.s @@ -104,16 +104,13 @@ r = 1 // input n, output r s = 2 // three temporary variables M = 3 a = 11 -// Please be careful when changing this, it is pretty fragile: -// 1, don't use unconditional branch as the linker is free to reorder the blocks; -// 2. if a == 11, beware that the linker will use R11 if you use certain instructions. +// Be careful: R(a) == R11 will be used by the linker for synthesized instructions. TEXT udiv<>(SB),NOSPLIT,$-4 CLZ R(q), R(s) // find normalizing shift MOVW.S R(q)<>25, PC, R(a) // most significant 7 bits of divisor - MOVBU.NE (4*36-64)(R(a)), R(a) // 36 == number of inst. between fast_udiv_tab and begin + MOVW $fast_udiv_tab<>-64(SB), R(M) + MOVBU.NE R(a)>>25(R(M)), R(a) // index by most significant 7 bits of divisor -begin: SUB.S $7, R(s) RSB $0, R(q), R(M) // M = -q MOVW.PL R(a)<=2^(31-6)=2^25 @@ -160,20 +155,34 @@ udiv_by_large_d: CMN R(r), R(M) ADD.CS R(M), R(r) ADD.CS $1, R(q) - - // return, can't use RET here or fast_udiv_tab will be dropped during linking - MOVW R14, R15 + RET udiv_by_0_or_1: // carry set if d==1, carry clear if d==0 - MOVW.CS R(r), R(q) - MOVW.CS $0, R(r) - BL.CC runtime·panicdivide(SB) // no way back + BCC udiv_by_0 + MOVW R(r), R(q) + MOVW $0, R(r) + RET - // return, can't use RET here or fast_udiv_tab will be dropped during linking - MOVW R14, R15 +udiv_by_0: + // The ARM toolchain expects it can emit references to DIV and MOD + // instructions. The linker rewrites each pseudo-instruction into + // a sequence that pushes two values onto the stack and then calls + // _divu, _modu, _div, or _mod (below), all of which have a 16-byte + // frame plus the saved LR. The traceback routine knows the expanded + // stack frame size at the pseudo-instruction call site, but it + // doesn't know that the frame has a non-standard layout. In particular, + // it expects to find a saved LR in the bottom word of the frame. + // Unwind the stack back to the pseudo-instruction call site, copy the + // saved LR where the traceback routine will look for it, and make it + // appear that panicdivide was called from that PC. + MOVW 0(R13), LR + ADD $20, R13 + MOVW 8(R13), R1 // actual saved LR + MOVW R1, 0(R13) // expected here for traceback + B runtime·panicdivide(SB) -fast_udiv_tab: +TEXT fast_udiv_tab<>(SB),NOSPLIT,$-4 // var tab [64]byte // tab[0] = 255; for i := 1; i <= 63; i++ { tab[i] = (1<<14)/(64+i) } // laid out here as little-endian uint32s diff --git a/test/divmod.go b/test/divmod.go new file mode 100644 index 0000000000..49fed0222c --- /dev/null +++ b/test/divmod.go @@ -0,0 +1,460 @@ +// run + +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Test division of variables. Generate many test cases, +// compute correct answer using shift and subtract, +// and then compare against results from divison and +// modulus operators. +// +// Primarily useful for testing software div/mod. + +package main + +const long = false + +func main() { + if long { + // About 3e9 test cases (calls to checkdiv3). + // Too long for everyday testing. + gen2(3, 64, 2, 64, checkdiv1) + println(ntest) + } else { + // About 4e6 test cases (calls to checkdiv3). + // Runs for 8 seconds on ARM chromebook, much faster elsewhere. + gen2(2, 64, 1, 64, checkdiv1) + } +} + +// generate all uint64 values x where x has at most n bits set in the low w +// and call f(x) for each. +func gen1(n, w int, f func(uint64)) { + gen(0, 0, n, w-1, f) +} + +func gen(val uint64, nbits, maxbits, pos int, f func(uint64)) { + if pos < 0 { + f(val) + return + } + gen(val, nbits, maxbits, pos-1, f) + if nbits < maxbits { + gen(val|1< y && y+y <= x { + sh++ + y <<= 1 + } + for ; sh >= 0; sh-- { + q <<= 1 + if x >= y { + x -= y + q |= 1 + } + y >>= 1 + } + return q, x +} + +// signed divide and mod: do unsigned and adjust signs. +func idiv(x, y int64) (q, r int64) { + // special case for minint / -1 = minint + if x-1 > x && y == -1 { + return x, 0 + } + ux := uint64(x) + uy := uint64(y) + if x < 0 { + ux = -ux + } + if y < 0 { + uy = -uy + } + uq, ur := udiv(ux, uy) + q = int64(uq) + r = int64(ur) + if x < 0 { + r = -r + } + if (x < 0) != (y < 0) { + q = -q + } + return q, r +} -- 2.50.0