math/big: replace division with multiplication by reciprocal word

author SparrowLii <liyuancylx@gmail.com>

Tue, 25 Aug 2020 08:33:50 +0000 (16:33 +0800)

committer Giovanni Bajo <rasky@develer.com>

Wed, 23 Sep 2020 21:55:55 +0000 (21:55 +0000)
author SparrowLii <liyuancylx@gmail.com>
Tue, 25 Aug 2020 08:33:50 +0000 (16:33 +0800)
committer Giovanni Bajo <rasky@develer.com>
Wed, 23 Sep 2020 21:55:55 +0000 (21:55 +0000)
diff --git a/src/cmd/compile/internal/gc/ssa.go b/src/cmd/compile/internal/gc/ssa.go

index d0b3e8df94e164f068291c26691d61daf93a37a1..815ff7f99fb358c5aad6603d3f42cd900ac4757b 100644 (file)
--- a/src/cmd/compile/internal/gc/ssa.go
+++ b/src/cmd/compile/internal/gc/ssa.go
@@ -4022,11 +4022,6 @@ func init() {
                         return s.newValue2(ssa.OpMul64uhilo, types.NewTuple(types.Types[TUINT64], types.Types[TUINT64]), args[0], args[1])
                 },
                 sys.ArchAMD64, sys.ArchARM64, sys.ArchPPC64LE, sys.ArchPPC64, sys.ArchS390X)
-       add("math/big", "divWW",
-               func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
-                       return s.newValue3(ssa.OpDiv128u, types.NewTuple(types.Types[TUINT64], types.Types[TUINT64]), args[0], args[1], args[2])
-               },
-               sys.ArchAMD64)
  }
  
  // findIntrinsic returns a function which builds the SSA equivalent of the
diff --git a/src/math/big/arith.go b/src/math/big/arith.go

index b0885f261fe9bad8e35290b94200a98a2048505b..750ce8aa398df4ba6d32244ea9e211c592e38821 100644 (file)
--- a/src/math/big/arith.go
+++ b/src/math/big/arith.go
@@ -60,12 +60,6 @@ func nlz(x Word) uint {
         return uint(bits.LeadingZeros(uint(x)))
  }
  
-// q = (u1<<_W + u0 - r)/v
-func divWW_g(u1, u0, v Word) (q, r Word) {
-       qq, rr := bits.Div(uint(u1), uint(u0), uint(v))
-       return Word(qq), Word(rr)
-}
-
  // The resulting carry c is either 0 or 1.
  func addVV_g(z, x, y []Word) (c Word) {
         // The comment near the top of this file discusses this for loop condition.
@@ -207,10 +201,87 @@ func addMulVVW_g(z, x []Word, y Word) (c Word) {
         return
  }
  
-func divWVW_g(z []Word, xn Word, x []Word, y Word) (r Word) {
+// q = ( x1 << _W + x0 - r)/y. m = floor(( _B^2 - 1 ) / d - _B). Requiring x1<y.
+// An approximate reciprocal with a reference to "Improved Division by Invariant Integers
+// (IEEE Transactions on Computers, 11 Jun. 2010)"
+func divWW(x1, x0, y, m Word) (q, r Word) {
+       s := nlz(y)
+       if s != 0 {
+               x1 = x1<<s | x0>>(_W-s)
+               x0 <<= s
+               y <<= s
+       }
+       d := uint(y)
+       // We know that
+       //   m = ⎣(B^2-1)/d⎦-B
+       //   ⎣(B^2-1)/d⎦ = m+B
+       //   (B^2-1)/d = m+B+delta1    0 <= delta1 <= (d-1)/d
+       //   B^2/d = m+B+delta2        0 <= delta2 <= 1
+       // The quotient we're trying to compute is
+       //   quotient = ⎣(x1*B+x0)/d⎦
+       //            = ⎣(x1*B*(B^2/d)+x0*(B^2/d))/B^2⎦
+       //            = ⎣(x1*B*(m+B+delta2)+x0*(m+B+delta2))/B^2⎦
+       //            = ⎣(x1*m+x1*B+x0)/B + x0*m/B^2 + delta2*(x1*B+x0)/B^2⎦
+       // The latter two terms of this three-term sum are between 0 and 1.
+       // So we can compute just the first term, and we will be low by at most 2.
+       t1, t0 := bits.Mul(uint(m), uint(x1))
+       _, c := bits.Add(t0, uint(x0), 0)
+       t1, _ = bits.Add(t1, uint(x1), c)
+       // The quotient is either t1, t1+1, or t1+2.
+       // We'll try t1 and adjust if needed.
+       qq := t1
+       // compute remainder r=x-d*q.
+       dq1, dq0 := bits.Mul(d, qq)
+       r0, b := bits.Sub(uint(x0), dq0, 0)
+       r1, _ := bits.Sub(uint(x1), dq1, b)
+       // The remainder we just computed is bounded above by B+d:
+       // r = x1*B + x0 - d*q.
+       //   = x1*B + x0 - d*⎣(x1*m+x1*B+x0)/B⎦
+       //   = x1*B + x0 - d*((x1*m+x1*B+x0)/B-alpha)                                   0 <= alpha < 1
+       //   = x1*B + x0 - x1*d/B*m                         - x1*d - x0*d/B + d*alpha
+       //   = x1*B + x0 - x1*d/B*⎣(B^2-1)/d-B⎦             - x1*d - x0*d/B + d*alpha
+       //   = x1*B + x0 - x1*d/B*⎣(B^2-1)/d-B⎦             - x1*d - x0*d/B + d*alpha
+       //   = x1*B + x0 - x1*d/B*((B^2-1)/d-B-beta)        - x1*d - x0*d/B + d*alpha   0 <= beta < 1
+       //   = x1*B + x0 - x1*B + x1/B + x1*d + x1*d/B*beta - x1*d - x0*d/B + d*alpha
+       //   =        x0        + x1/B        + x1*d/B*beta        - x0*d/B + d*alpha
+       //   = x0*(1-d/B) + x1*(1+d*beta)/B + d*alpha
+       //   <  B*(1-d/B) +  d*B/B          + d          because x0<B (and 1-d/B>0), x1<d, 1+d*beta<=B, alpha<1
+       //   =  B - d     +  d              + d
+       //   = B+d
+       // So r1 can only be 0 or 1. If r1 is 1, then we know q was too small.
+       // Add 1 to q and subtract d from r. That guarantees that r is <B, so
+       // we no longer need to keep track of r1.
+       if r1 != 0 {
+               qq++
+               r0 -= d
+       }
+       // If the remainder is still too large, increment q one more time.
+       if r0 >= d {
+               qq++
+               r0 -= d
+       }
+       return Word(qq), Word(r0 >> s)
+}
+
+func divWVW(z []Word, xn Word, x []Word, y Word) (r Word) {
         r = xn
+       if len(x) == 1 {
+               qq, rr := bits.Div(uint(r), uint(x[0]), uint(y))
+               z[0] = Word(qq)
+               return Word(rr)
+       }
+       rec := reciprocalWord(y)
         for i := len(z) - 1; i >= 0; i-- {
-               z[i], r = divWW_g(r, x[i], y)
+               z[i], r = divWW(r, x[i], y, rec)
         }
-       return
+       return r
+}
+
+// reciprocalWord return the reciprocal of the divisor. rec = floor(( _B^2 - 1 ) / u - _B). u = d1 << nlz(d1).
+func reciprocalWord(d1 Word) Word {
+       u := uint(d1 << nlz(d1))
+       x1 := ^u
+       x0 := uint(_M)
+       rec, _ := bits.Div(x1, x0, u) // (_B^2-1)/U-_B = (_B*(_M-C)+_M)/U
+       return Word(rec)
  }
diff --git a/src/math/big/arith_386.s b/src/math/big/arith_386.s

index f61da2aba7251a46087b394fc2f9e9502f25b057..d0ea949fe6689c950d4fd1a12d875ee8ebdbf91a 100644 (file)
--- a/src/math/big/arith_386.s
+++ b/src/math/big/arith_386.s
@@ -18,16 +18,6 @@ TEXT ·mulWW(SB),NOSPLIT,$0
         RET
  
  
-// func divWW(x1, x0, y Word) (q, r Word)
-TEXT ·divWW(SB),NOSPLIT,$0
-       MOVL x1+0(FP), DX
-       MOVL x0+4(FP), AX
-       DIVL y+8(FP)
-       MOVL AX, q+12(FP)
-       MOVL DX, r+16(FP)
-       RET
-
-
  // func addVV(z, x, y []Word) (c Word)
  TEXT ·addVV(SB),NOSPLIT,$0
         MOVL z+0(FP), DI
@@ -251,21 +241,4 @@ E6:        CMPL BX, $0             // i < 0
         RET
  
  
-// func divWVW(z* Word, xn Word, x []Word, y Word) (r Word)
-TEXT ·divWVW(SB),NOSPLIT,$0
-       MOVL z+0(FP), DI
-       MOVL xn+12(FP), DX      // r = xn
-       MOVL x+16(FP), SI
-       MOVL y+28(FP), CX
-       MOVL z_len+4(FP), BX    // i = z
-       JMP E7
  
-L7:    MOVL (SI)(BX*4), AX
-       DIVL CX
-       MOVL AX, (DI)(BX*4)
-
-E7:    SUBL $1, BX             // i--
-       JGE L7                  // i >= 0
-
-       MOVL DX, r+32(FP)
-       RET
diff --git a/src/math/big/arith_amd64.s b/src/math/big/arith_amd64.s

index b75639f5406c260ca0f0e76d7f00ff3249aa739b..61043ca2d97c491c8bd9591ae2f2fcdb45a7f0f1 100644 (file)
--- a/src/math/big/arith_amd64.s
+++ b/src/math/big/arith_amd64.s
@@ -18,14 +18,6 @@ TEXT ·mulWW(SB),NOSPLIT,$0
         RET
  
  
-// func divWW(x1, x0, y Word) (q, r Word)
-TEXT ·divWW(SB),NOSPLIT,$0
-       MOVQ x1+0(FP), DX
-       MOVQ x0+8(FP), AX
-       DIVQ y+16(FP)
-       MOVQ AX, q+24(FP)
-       MOVQ DX, r+32(FP)
-       RET
  
  // The carry bit is saved with SBBQ Rx, Rx: if the carry was set, Rx is -1, otherwise it is 0.
  // It is restored with ADDQ Rx, Rx: if Rx was -1 the carry is set, otherwise it is cleared.
@@ -531,21 +523,3 @@ adx_short:
  
  
  
-// func divWVW(z []Word, xn Word, x []Word, y Word) (r Word)
-TEXT ·divWVW(SB),NOSPLIT,$0
-       MOVQ z+0(FP), R10
-       MOVQ xn+24(FP), DX      // r = xn
-       MOVQ x+32(FP), R8
-       MOVQ y+56(FP), R9
-       MOVQ z_len+8(FP), BX    // i = z
-       JMP E7
-
-L7:    MOVQ (R8)(BX*8), AX
-       DIVQ R9
-       MOVQ AX, (R10)(BX*8)
-
-E7:    SUBQ $1, BX             // i--
-       JGE L7                  // i >= 0
-
-       MOVQ DX, r+64(FP)
-       RET
diff --git a/src/math/big/arith_arm.s b/src/math/big/arith_arm.s

index 33aa36f7090fb80f8322e9f027300fb3383aedf7..cbf7445e7abded24bca3fbbd2aa27ce896707852 100644 (file)
--- a/src/math/big/arith_arm.s
+++ b/src/math/big/arith_arm.s
@@ -272,17 +272,6 @@ E9:
         RET
  
  
-// func divWVW(z* Word, xn Word, x []Word, y Word) (r Word)
-TEXT ·divWVW(SB),NOSPLIT,$0
-       // ARM has no multiword division, so use portable code.
-       B ·divWVW_g(SB)
-
-
-// func divWW(x1, x0, y Word) (q, r Word)
-TEXT ·divWW(SB),NOSPLIT,$0
-       // ARM has no multiword division, so use portable code.
-       B ·divWW_g(SB)
-
  
  // func mulWW(x, y Word) (z1, z0 Word)
  TEXT ·mulWW(SB),NOSPLIT,$0
diff --git a/src/math/big/arith_arm64.s b/src/math/big/arith_arm64.s

index da6e408e19f726c76d3a27eb804849fcb2034947..22357d088e4c9b9d179dcf22fb638be51350997b 100644 (file)
--- a/src/math/big/arith_arm64.s
+++ b/src/math/big/arith_arm64.s
@@ -23,11 +23,6 @@ TEXT ·mulWW(SB),NOSPLIT,$0
         RET
  
  
-// func divWW(x1, x0, y Word) (q, r Word)
-TEXT ·divWW(SB),NOSPLIT,$0
-       B       ·divWW_g(SB) // ARM64 has no multiword division
-
-
  // func addVV(z, x, y []Word) (c Word)
  TEXT ·addVV(SB),NOSPLIT,$0
         MOVD    z_len+8(FP), R0
@@ -585,6 +580,4 @@ done:
         MOVD    R4, c+56(FP)
         RET
  
-// func divWVW(z []Word, xn Word, x []Word, y Word) (r Word)
-TEXT ·divWVW(SB),NOSPLIT,$0
-       B ·divWVW_g(SB)
+
diff --git a/src/math/big/arith_decl.go b/src/math/big/arith_decl.go

index 41e592334c376ecc532bd5bf870c7fe89f3c12ab..d519bdc87b636328d568f5a34d1d77b396c296fe 100644 (file)
--- a/src/math/big/arith_decl.go
+++ b/src/math/big/arith_decl.go
@@ -8,7 +8,6 @@ package big
  
  // implemented in arith_$GOARCH.s
  func mulWW(x, y Word) (z1, z0 Word)
-func divWW(x1, x0, y Word) (q, r Word)
  func addVV(z, x, y []Word) (c Word)
  func subVV(z, x, y []Word) (c Word)
  func addVW(z, x []Word, y Word) (c Word)
@@ -17,4 +16,3 @@ func shlVU(z, x []Word, s uint) (c Word)
  func shrVU(z, x []Word, s uint) (c Word)
  func mulAddVWW(z, x []Word, y, r Word) (c Word)
  func addMulVVW(z, x []Word, y Word) (c Word)
-func divWVW(z []Word, xn Word, x []Word, y Word) (r Word)
diff --git a/src/math/big/arith_decl_pure.go b/src/math/big/arith_decl_pure.go

index 305f7ee03b42d8078fc43dc33a1a7b8f0c58f8b2..5faa3bd281999eab511cb7384b2d0f7efc80b34b 100644 (file)
--- a/src/math/big/arith_decl_pure.go
+++ b/src/math/big/arith_decl_pure.go
@@ -10,10 +10,6 @@ func mulWW(x, y Word) (z1, z0 Word) {
         return mulWW_g(x, y)
  }
  
-func divWW(x1, x0, y Word) (q, r Word) {
-       return divWW_g(x1, x0, y)
-}
-
  func addVV(z, x, y []Word) (c Word) {
         return addVV_g(z, x, y)
  }
@@ -55,7 +51,3 @@ func mulAddVWW(z, x []Word, y, r Word) (c Word) {
  func addMulVVW(z, x []Word, y Word) (c Word) {
         return addMulVVW_g(z, x, y)
  }
-
-func divWVW(z []Word, xn Word, x []Word, y Word) (r Word) {
-       return divWVW_g(z, xn, x, y)
-}
diff --git a/src/math/big/arith_mips64x.s b/src/math/big/arith_mips64x.s

index 983510ee3d42d1e9f8d7b44334fc650b1f22afdd..804b9fe06edc26e9cdac916a06a0d9e631fc6186 100644 (file)
--- a/src/math/big/arith_mips64x.s
+++ b/src/math/big/arith_mips64x.s
@@ -12,9 +12,6 @@
  TEXT ·mulWW(SB),NOSPLIT,$0
         JMP ·mulWW_g(SB)
  
-TEXT ·divWW(SB),NOSPLIT,$0
-       JMP ·divWW_g(SB)
-
  TEXT ·addVV(SB),NOSPLIT,$0
         JMP ·addVV_g(SB)
  
@@ -39,5 +36,3 @@ TEXT ·mulAddVWW(SB),NOSPLIT,$0
  TEXT ·addMulVVW(SB),NOSPLIT,$0
         JMP ·addMulVVW_g(SB)
  
-TEXT ·divWVW(SB),NOSPLIT,$0
-       JMP ·divWVW_g(SB)
diff --git a/src/math/big/arith_mipsx.s b/src/math/big/arith_mipsx.s

index 54cafbd9c0c80cc6108bd2dde5596ab1ff9e359e..efdecb80f3291edf7a75464ed9109924747c4aae 100644 (file)
--- a/src/math/big/arith_mipsx.s
+++ b/src/math/big/arith_mipsx.s
@@ -12,9 +12,6 @@
  TEXT ·mulWW(SB),NOSPLIT,$0
         JMP     ·mulWW_g(SB)
  
-TEXT ·divWW(SB),NOSPLIT,$0
-       JMP     ·divWW_g(SB)
-
  TEXT ·addVV(SB),NOSPLIT,$0
         JMP     ·addVV_g(SB)
  
@@ -39,5 +36,3 @@ TEXT ·mulAddVWW(SB),NOSPLIT,$0
  TEXT ·addMulVVW(SB),NOSPLIT,$0
         JMP     ·addMulVVW_g(SB)
  
-TEXT ·divWVW(SB),NOSPLIT,$0
-       JMP     ·divWVW_g(SB)
diff --git a/src/math/big/arith_ppc64x.s b/src/math/big/arith_ppc64x.s

index 409e10ab48bd4741c168998be6839d5f2fef2e75..b299ccc2fb824cfbbd326f9e2ea79292d4ddad1b 100644 (file)
--- a/src/math/big/arith_ppc64x.s
+++ b/src/math/big/arith_ppc64x.s
@@ -478,44 +478,4 @@ done:
         MOVD R4, c+56(FP)
         RET
  
-// func divWW(x1, x0, y Word) (q, r Word)
-TEXT ·divWW(SB), NOSPLIT, $0
-       MOVD x1+0(FP), R4
-       MOVD x0+8(FP), R5
-       MOVD y+16(FP), R6
-
-       CMPU R4, R6
-       BGE  divbigger
-
-       // from the programmer's note in ch. 3 of the ISA manual, p.74
-       DIVDEU R6, R4, R3
-       DIVDU  R6, R5, R7
-       MULLD  R6, R3, R8
-       MULLD  R6, R7, R20
-       SUB    R20, R5, R10
-       ADD    R7, R3, R3
-       SUB    R8, R10, R4
-       CMPU   R4, R10
-       BLT    adjust
-       CMPU   R4, R6
-       BLT    end
-
-adjust:
-       MOVD $1, R21
-       ADD  R21, R3, R3
-       SUB  R6, R4, R4
-
-end:
-       MOVD R3, q+24(FP)
-       MOVD R4, r+32(FP)
  
-       RET
-
-divbigger:
-       MOVD $-1, R7
-       MOVD R7, q+24(FP)
-       MOVD R7, r+32(FP)
-       RET
-
-TEXT ·divWVW(SB), NOSPLIT, $0
-       BR ·divWVW_g(SB)
diff --git a/src/math/big/arith_riscv64.s b/src/math/big/arith_riscv64.s

index 59065c3f7bac37cf6011b15dfd2838d739f450f5..a2f7666c7b9acfe023d3d628f2fb53b0b9130ce3 100644 (file)
--- a/src/math/big/arith_riscv64.s
+++ b/src/math/big/arith_riscv64.s
@@ -19,9 +19,6 @@ TEXT ·mulWW(SB),NOSPLIT,$0
         MOV     X8, z0+24(FP)
         RET
  
-// func divWW(x1, x0, y Word) (q, r Word)
-TEXT ·divWW(SB),NOSPLIT,$0
-       JMP ·divWW_g(SB)               // riscv64 has no multiword division
  
  TEXT ·addVV(SB),NOSPLIT,$0
         JMP ·addVV_g(SB)
@@ -47,5 +44,3 @@ TEXT ·mulAddVWW(SB),NOSPLIT,$0
  TEXT ·addMulVVW(SB),NOSPLIT,$0
         JMP ·addMulVVW_g(SB)
  
-TEXT ·divWVW(SB),NOSPLIT,$0
-       JMP ·divWVW_g(SB)
diff --git a/src/math/big/arith_s390x.s b/src/math/big/arith_s390x.s

index 48917681112a981b9685dba693e62a943c26e843..242aca7434a1345f24a5b7bd240029b8134bfbaf 100644 (file)
--- a/src/math/big/arith_s390x.s
+++ b/src/math/big/arith_s390x.s
@@ -17,15 +17,6 @@ TEXT ·mulWW(SB), NOSPLIT, $0
         MOVD   R11, z0+24(FP)
         RET
  
-// func divWW(x1, x0, y Word) (q, r Word)
-TEXT ·divWW(SB), NOSPLIT, $0
-       MOVD x1+0(FP), R10
-       MOVD x0+8(FP), R11
-       MOVD y+16(FP), R5
-       WORD $0xb98700a5   // dlgr r10,r5
-       MOVD R11, q+24(FP)
-       MOVD R10, r+32(FP)
-       RET
  
  // DI = R3, CX = R4, SI = r10, r8 = r8, r9=r9, r10 = r2 , r11 = r5, r12 = r6, r13 = r7, r14 = r1 (R0 set to 0) + use R11
  // func addVV(z, x, y []Word) (c Word)
@@ -990,27 +981,3 @@ E6:
         MOVD R4, c+56(FP)
         RET
  
-// func divWVW(z []Word, xn Word, x []Word, y Word) (r Word)
-// CX = R4, r8 = r8, r9=r9, r10 = r2 , r11 = r5, AX = r11, DX = R6, r12=r12, BX = R1(*8) , (R0 set to 0) + use R11 + use R7 for i
-TEXT ·divWVW(SB), NOSPLIT, $0
-       MOVD z+0(FP), R2
-       MOVD xn+24(FP), R10  // r = xn
-       MOVD x+32(FP), R8
-       MOVD y+56(FP), R9
-       MOVD z_len+8(FP), R7 // i = z
-       SLD  $3, R7, R1      // i*8
-       MOVD $0, R0          // make sure it's zero
-       BR   E7
-
-L7:
-       MOVD (R8)(R1*1), R11
-       WORD $0xB98700A9     // DLGR R10,R9
-       MOVD R11, (R2)(R1*1)
-
-E7:
-       SUB $1, R7 // i--
-       SUB $8, R1
-       BGE L7     // i >= 0
-
-       MOVD R10, r+64(FP)
-       RET
diff --git a/src/math/big/arith_test.go b/src/math/big/arith_test.go

index fc205934c5ca92f568c60a1f2a347e3746efaa33..808d17845927e72bbd0ad7e8fd15bb7cbf94a17d 100644 (file)
--- a/src/math/big/arith_test.go
+++ b/src/math/big/arith_test.go
@@ -7,6 +7,7 @@ package big
  import (
         "fmt"
         "internal/testenv"
+       "math/bits"
         "math/rand"
         "strings"
         "testing"
@@ -493,7 +494,6 @@ func TestFunVWW(t *testing.T) {
  
                 if a.y != 0 && a.r < a.y {
                         arg := argWVW{a.x, a.c, a.z, a.y, a.r}
-                       testFunWVW(t, "divWVW_g", divWVW_g, arg)
                         testFunWVW(t, "divWVW", divWVW, arg)
                 }
         }
@@ -536,6 +536,42 @@ func TestMulAddWWW(t *testing.T) {
         }
  }
  
+var divWWTests = []struct {
+       x1, x0, y Word
+       q, r      Word
+}{
+       {_M >> 1, 0, _M, _M >> 1, _M >> 1},
+       {_M - (1 << (_W - 2)), _M, 3 << (_W - 2), _M, _M - (1 << (_W - 2))},
+}
+
+const testsNumber = 1 << 16
+
+func TestDivWW(t *testing.T) {
+       i := 0
+       for i, test := range divWWTests {
+               rec := reciprocalWord(test.y)
+               q, r := divWW(test.x1, test.x0, test.y, rec)
+               if q != test.q || r != test.r {
+                       t.Errorf("#%d got (%x, %x) want (%x, %x)", i, q, r, test.q, test.r)
+               }
+       }
+       //random tests
+       for ; i < testsNumber; i++ {
+               x1 := rndW()
+               x0 := rndW()
+               y := rndW()
+               if x1 >= y {
+                       continue
+               }
+               rec := reciprocalWord(y)
+               qGot, rGot := divWW(x1, x0, y, rec)
+               qWant, rWant := bits.Div(uint(x1), uint(x0), uint(y))
+               if uint(qGot) != qWant || uint(rGot) != rWant {
+                       t.Errorf("#%d got (%x, %x) want (%x, %x)", i, qGot, rGot, qWant, rWant)
+               }
+       }
+}
+
  func BenchmarkMulAddVWW(b *testing.B) {
         for _, n := range benchSizes {
                 if isRaceBuilder && n > 1e3 {
@@ -570,3 +606,19 @@ func BenchmarkAddMulVVW(b *testing.B) {
                 })
         }
  }
+func BenchmarkDivWVW(b *testing.B) {
+       for _, n := range benchSizes {
+               if isRaceBuilder && n > 1e3 {
+                       continue
+               }
+               x := rndV(n)
+               y := rndW()
+               z := make([]Word, n)
+               b.Run(fmt.Sprint(n), func(b *testing.B) {
+                       b.SetBytes(int64(n * _W))
+                       for i := 0; i < b.N; i++ {
+                               divWVW(z, 0, x, y)
+                       }
+               })
+       }
+}
diff --git a/src/math/big/arith_wasm.s b/src/math/big/arith_wasm.s

index 382597c694535996a215277758121ff90a88ef50..add106446909d6fc9fa0f9c24fee8efc4cde343f 100644 (file)
--- a/src/math/big/arith_wasm.s
+++ b/src/math/big/arith_wasm.s
@@ -9,9 +9,6 @@
  TEXT ·mulWW(SB),NOSPLIT,$0
         JMP ·mulWW_g(SB)
  
-TEXT ·divWW(SB),NOSPLIT,$0
-       JMP ·divWW_g(SB)
-
  TEXT ·addVV(SB),NOSPLIT,$0
         JMP ·addVV_g(SB)
  
@@ -36,5 +33,3 @@ TEXT ·mulAddVWW(SB),NOSPLIT,$0
  TEXT ·addMulVVW(SB),NOSPLIT,$0
         JMP ·addMulVVW_g(SB)
  
-TEXT ·divWVW(SB),NOSPLIT,$0
-       JMP ·divWVW_g(SB)
diff --git a/src/math/big/nat.go b/src/math/big/nat.go

index 6a3989bf9d82bf1c9ec3b57aedead466002bfb8e..c2f3787848ccab922d81e56666e24832faa4af48 100644 (file)
--- a/src/math/big/nat.go
+++ b/src/math/big/nat.go
@@ -751,6 +751,7 @@ func (q nat) divBasic(u, v nat) {
  
         // D2.
         vn1 := v[n-1]
+       rec := reciprocalWord(vn1)
         for j := m; j >= 0; j-- {
                 // D3.
                 qhat := Word(_M)
@@ -760,7 +761,7 @@ func (q nat) divBasic(u, v nat) {
                 }
                 if ujn != vn1 {
                         var rhat Word
-                       qhat, rhat = divWW(ujn, u[j+n-1], vn1)
+                       qhat, rhat = divWW(ujn, u[j+n-1], vn1, rec)
  
                         // x1 | x2 = q̂v_{n-2}
                         vn2 := v[n-2]
author	SparrowLii <liyuancylx@gmail.com>
	Tue, 25 Aug 2020 08:33:50 +0000 (16:33 +0800)
committer	Giovanni Bajo <rasky@develer.com>
	Wed, 23 Sep 2020 21:55:55 +0000 (21:55 +0000)
src/cmd/compile/internal/gc/ssa.go		patch \| blob \| history
src/math/big/arith.go		patch \| blob \| history
src/math/big/arith_386.s		patch \| blob \| history
src/math/big/arith_amd64.s		patch \| blob \| history
src/math/big/arith_arm.s		patch \| blob \| history
src/math/big/arith_arm64.s		patch \| blob \| history
src/math/big/arith_decl.go		patch \| blob \| history
src/math/big/arith_decl_pure.go		patch \| blob \| history
src/math/big/arith_mips64x.s		patch \| blob \| history
src/math/big/arith_mipsx.s		patch \| blob \| history
src/math/big/arith_ppc64x.s		patch \| blob \| history
src/math/big/arith_riscv64.s		patch \| blob \| history
src/math/big/arith_s390x.s		patch \| blob \| history
src/math/big/arith_test.go		patch \| blob \| history
src/math/big/arith_wasm.s		patch \| blob \| history
src/math/big/nat.go		patch \| blob \| history