math/big: replace addVW/subVW assembly with fast pure Go

author Russ Cox <rsc@golang.org>

Mon, 7 Apr 2025 21:13:20 +0000 (17:13 -0400)

committer Russ Cox <rsc@golang.org>

Fri, 18 Apr 2025 22:07:59 +0000 (15:07 -0700)
author Russ Cox <rsc@golang.org>
Mon, 7 Apr 2025 21:13:20 +0000 (17:13 -0400)
committer Russ Cox <rsc@golang.org>
Fri, 18 Apr 2025 22:07:59 +0000 (15:07 -0700)
diff --git a/src/cmd/compile/internal/test/inl_test.go b/src/cmd/compile/internal/test/inl_test.go

index 1dbd68cd67e064c80d9a4c4660c244758ceef1c2..760bb7a999f312a3a629d43edaebcdff1f4bad5d 100644 (file)
--- a/src/cmd/compile/internal/test/inl_test.go
+++ b/src/cmd/compile/internal/test/inl_test.go
@@ -175,9 +175,6 @@ func TestIntendedInlining(t *testing.T) {
                 },
                 "math/big": {
                         "bigEndianWord",
-                       // The following functions require the math_big_pure_go build tag.
-                       "addVW",
-                       "subVW",
                 },
                 "math/rand": {
                         "(*rngSource).Int63",
diff --git a/src/math/big/arith.go b/src/math/big/arith.go

index cd2b8a4228062d4b9d9f874219879ad24b1d9486..e2cd99f602bf5e1d0a2fa8c9b3ff0682cce34fc8 100644 (file)
--- a/src/math/big/arith.go
+++ b/src/math/big/arith.go
@@ -10,7 +10,10 @@
  
  package big
  
-import "math/bits"
+import (
+       "math/bits"
+       _ "unsafe" // for go:linkname
+)
  
  // A Word represents a single digit of a multi-precision unsigned integer.
  type Word uint
@@ -82,33 +85,50 @@ func subVV_g(z, x, y []Word) (c Word) {
         return
  }
  
-// The resulting carry c is either 0 or 1.
-func addVW_g(z, x []Word, y Word) (c Word) {
-       c = y
-       // The comment near the top of this file discusses this for loop condition.
-       for i := 0; i < len(z) && i < len(x); i++ {
-               zi, cc := bits.Add(uint(x[i]), uint(c), 0)
-               z[i] = Word(zi)
-               c = Word(cc)
+// addVW sets z = x + y, returning the final carry c.
+// The behavior is undefined if len(x) != len(z).
+// If len(z) == 0, c = y; otherwise, c is 0 or 1.
+//
+// addVW should be an internal detail,
+// but widely used packages access it using linkname.
+// Notable members of the hall of shame include:
+//   - github.com/remyoudompheng/bigfft
+//
+// Do not remove or change the type signature.
+// See go.dev/issue/67401.
+//
+//go:linkname addVW
+func addVW(z, x []Word, y Word) (c Word) {
+       x = x[:len(z)]
+       if len(z) == 0 {
+               return y
         }
-       return
+       zi, cc := bits.Add(uint(x[0]), uint(y), 0)
+       z[0] = Word(zi)
+       if cc == 0 {
+               if &z[0] != &x[0] {
+                       copy(z[1:], x[1:])
+               }
+               return 0
+       }
+       for i := 1; i < len(z); i++ {
+               xi := x[i]
+               if xi != ^Word(0) {
+                       z[i] = xi + 1
+                       if &z[0] != &x[0] {
+                               copy(z[i+1:], x[i+1:])
+                       }
+                       return 0
+               }
+               z[i] = 0
+       }
+       return 1
  }
  
-// addVWlarge is addVW, but intended for large z.
-// The only difference is that we check on every iteration
-// whether we are done with carries,
-// and if so, switch to a much faster copy instead.
-// This is only a good idea for large z,
-// because the overhead of the check and the function call
-// outweigh the benefits when z is small.
-func addVWlarge(z, x []Word, y Word) (c Word) {
+// addVW_ref is the reference implementation for addVW, used only for testing.
+func addVW_ref(z, x []Word, y Word) (c Word) {
         c = y
-       // The comment near the top of this file discusses this for loop condition.
-       for i := 0; i < len(z) && i < len(x); i++ {
-               if c == 0 {
-                       copy(z[i:], x[i:])
-                       return
-               }
+       for i := range z {
                 zi, cc := bits.Add(uint(x[i]), uint(c), 0)
                 z[i] = Word(zi)
                 c = Word(cc)
@@ -116,31 +136,55 @@ func addVWlarge(z, x []Word, y Word) (c Word) {
         return
  }
  
-func subVW_g(z, x []Word, y Word) (c Word) {
-       c = y
-       // The comment near the top of this file discusses this for loop condition.
-       for i := 0; i < len(z) && i < len(x); i++ {
-               zi, cc := bits.Sub(uint(x[i]), uint(c), 0)
-               z[i] = Word(zi)
-               c = Word(cc)
+// subVW sets z = x - y, returning the final carry c.
+// The behavior is undefined if len(x) != len(z).
+// If len(z) == 0, c = y; otherwise, c is 0 or 1.
+//
+// subVW should be an internal detail,
+// but widely used packages access it using linkname.
+// Notable members of the hall of shame include:
+//   - github.com/remyoudompheng/bigfft
+//
+// Do not remove or change the type signature.
+// See go.dev/issue/67401.
+//
+//go:linkname subVW
+func subVW(z, x []Word, y Word) (c Word) {
+       x = x[:len(z)]
+       if len(z) == 0 {
+               return y
         }
-       return
+       zi, cc := bits.Sub(uint(x[0]), uint(y), 0)
+       z[0] = Word(zi)
+       if cc == 0 {
+               if &z[0] != &x[0] {
+                       copy(z[1:], x[1:])
+               }
+               return 0
+       }
+       for i := 1; i < len(z); i++ {
+               xi := x[i]
+               if xi != 0 {
+                       z[i] = xi - 1
+                       if &z[0] != &x[0] {
+                               copy(z[i+1:], x[i+1:])
+                       }
+                       return 0
+               }
+               z[i] = ^Word(0)
+       }
+       return 1
  }
  
-// subVWlarge is to subVW as addVWlarge is to addVW.
-func subVWlarge(z, x []Word, y Word) (c Word) {
+// subVW_ref is the reference implementation for subVW, used only for testing.
+func subVW_ref(z, x []Word, y Word) (c Word) {
         c = y
-       // The comment near the top of this file discusses this for loop condition.
-       for i := 0; i < len(z) && i < len(x); i++ {
-               if c == 0 {
-                       copy(z[i:], x[i:])
-                       return
-               }
+       for i := range z {
                 zi, cc := bits.Sub(uint(x[i]), uint(c), 0)
                 z[i] = Word(zi)
                 c = Word(cc)
         }
-       return
+       return c
  }
  
  func lshVU_g(z, x []Word, s uint) (c Word) {
diff --git a/src/math/big/arith_386.s b/src/math/big/arith_386.s

index c3567c632dc03a228fd9f00b3a3d3fb7bec92da3..a989503c1cb91b9695562b2bd8e385e6eb7012f6 100644 (file)
--- a/src/math/big/arith_386.s
+++ b/src/math/big/arith_386.s
@@ -60,51 +60,6 @@ E2:  CMPL BX, BP             // i < n
         RET
  
  
-// func addVW(z, x []Word, y Word) (c Word)
-TEXT ·addVW(SB),NOSPLIT,$0
-       MOVL z+0(FP), DI
-       MOVL x+12(FP), SI
-       MOVL y+24(FP), AX       // c = y
-       MOVL z_len+4(FP), BP
-       MOVL $0, BX             // i = 0
-       JMP E3
-
-L3:    ADDL (SI)(BX*4), AX
-       MOVL AX, (DI)(BX*4)
-       SBBL AX, AX             // save CF
-       NEGL AX
-       ADDL $1, BX             // i++
-
-E3:    CMPL BX, BP             // i < n
-       JL L3
-
-       MOVL AX, c+28(FP)
-       RET
-
-
-// func subVW(z, x []Word, y Word) (c Word)
-TEXT ·subVW(SB),NOSPLIT,$0
-       MOVL z+0(FP), DI
-       MOVL x+12(FP), SI
-       MOVL y+24(FP), AX       // c = y
-       MOVL z_len+4(FP), BP
-       MOVL $0, BX             // i = 0
-       JMP E4
-
-L4:    MOVL (SI)(BX*4), DX
-       SUBL AX, DX
-       MOVL DX, (DI)(BX*4)
-       SBBL AX, AX             // save CF
-       NEGL AX
-       ADDL $1, BX             // i++
-
-E4:    CMPL BX, BP             // i < n
-       JL L4
-
-       MOVL AX, c+28(FP)
-       RET
-
-
  // func lshVU(z, x []Word, s uint) (c Word)
  TEXT ·lshVU(SB),NOSPLIT,$0
         MOVL z_len+4(FP), BX    // i = z
diff --git a/src/math/big/arith_amd64.s b/src/math/big/arith_amd64.s

index 2e1d68f935b3b667ded79f1322ab4edee832b880..66bc6d41ceda429ad7e14846cc2ec290c2d5bdd4 100644 (file)
--- a/src/math/big/arith_amd64.s
+++ b/src/math/big/arith_amd64.s
@@ -121,119 +121,6 @@ E2:       NEGQ CX
         MOVQ CX, c+72(FP)       // return c
         RET
  
-
-// func addVW(z, x []Word, y Word) (c Word)
-TEXT ·addVW(SB),NOSPLIT,$0
-       MOVQ z_len+8(FP), DI
-       CMPQ DI, $32
-       JG large
-       MOVQ x+24(FP), R8
-       MOVQ y+48(FP), CX       // c = y
-       MOVQ z+0(FP), R10
-
-       MOVQ $0, SI             // i = 0
-
-       // s/JL/JMP/ below to disable the unrolled loop
-       SUBQ $4, DI             // n -= 4
-       JL V3                   // if n < 4 goto V3
-
-U3:    // n >= 0
-       // regular loop body unrolled 4x
-       MOVQ 0(R8)(SI*8), R11
-       MOVQ 8(R8)(SI*8), R12
-       MOVQ 16(R8)(SI*8), R13
-       MOVQ 24(R8)(SI*8), R14
-       ADDQ CX, R11
-       ADCQ $0, R12
-       ADCQ $0, R13
-       ADCQ $0, R14
-       SBBQ CX, CX             // save CF
-       NEGQ CX
-       MOVQ R11, 0(R10)(SI*8)
-       MOVQ R12, 8(R10)(SI*8)
-       MOVQ R13, 16(R10)(SI*8)
-       MOVQ R14, 24(R10)(SI*8)
-
-       ADDQ $4, SI             // i += 4
-       SUBQ $4, DI             // n -= 4
-       JGE U3                  // if n >= 0 goto U3
-
-V3:    ADDQ $4, DI             // n += 4
-       JLE E3                  // if n <= 0 goto E3
-
-L3:    // n > 0
-       ADDQ 0(R8)(SI*8), CX
-       MOVQ CX, 0(R10)(SI*8)
-       SBBQ CX, CX             // save CF
-       NEGQ CX
-
-       ADDQ $1, SI             // i++
-       SUBQ $1, DI             // n--
-       JG L3                   // if n > 0 goto L3
-
-E3:    MOVQ CX, c+56(FP)       // return c
-       RET
-large:
-       JMP ·addVWlarge(SB)
-
-
-// func subVW(z, x []Word, y Word) (c Word)
-// (same as addVW except for SUBQ/SBBQ instead of ADDQ/ADCQ and label names)
-TEXT ·subVW(SB),NOSPLIT,$0
-       MOVQ z_len+8(FP), DI
-       CMPQ DI, $32
-       JG large
-       MOVQ x+24(FP), R8
-       MOVQ y+48(FP), CX       // c = y
-       MOVQ z+0(FP), R10
-
-       MOVQ $0, SI             // i = 0
-
-       // s/JL/JMP/ below to disable the unrolled loop
-       SUBQ $4, DI             // n -= 4
-       JL V4                   // if n < 4 goto V4
-
-U4:    // n >= 0
-       // regular loop body unrolled 4x
-       MOVQ 0(R8)(SI*8), R11
-       MOVQ 8(R8)(SI*8), R12
-       MOVQ 16(R8)(SI*8), R13
-       MOVQ 24(R8)(SI*8), R14
-       SUBQ CX, R11
-       SBBQ $0, R12
-       SBBQ $0, R13
-       SBBQ $0, R14
-       SBBQ CX, CX             // save CF
-       NEGQ CX
-       MOVQ R11, 0(R10)(SI*8)
-       MOVQ R12, 8(R10)(SI*8)
-       MOVQ R13, 16(R10)(SI*8)
-       MOVQ R14, 24(R10)(SI*8)
-
-       ADDQ $4, SI             // i += 4
-       SUBQ $4, DI             // n -= 4
-       JGE U4                  // if n >= 0 goto U4
-
-V4:    ADDQ $4, DI             // n += 4
-       JLE E4                  // if n <= 0 goto E4
-
-L4:    // n > 0
-       MOVQ 0(R8)(SI*8), R11
-       SUBQ CX, R11
-       MOVQ R11, 0(R10)(SI*8)
-       SBBQ CX, CX             // save CF
-       NEGQ CX
-
-       ADDQ $1, SI             // i++
-       SUBQ $1, DI             // n--
-       JG L4                   // if n > 0 goto L4
-
-E4:    MOVQ CX, c+56(FP)       // return c
-       RET
-large:
-       JMP ·subVWlarge(SB)
-
-
  // func lshVU(z, x []Word, s uint) (c Word)
  TEXT ·lshVU(SB),NOSPLIT,$0
         MOVQ z_len+8(FP), BX    // i = z
diff --git a/src/math/big/arith_arm.s b/src/math/big/arith_arm.s

index 5b04e07bd02905aa52591a1709cd2e77c56baf94..ce9fe5f6fb832aa146cf51a4b716c3daa653f36a 100644 (file)
--- a/src/math/big/arith_arm.s
+++ b/src/math/big/arith_arm.s
@@ -58,66 +58,6 @@ E2:
         RET
  
  
-// func addVW(z, x []Word, y Word) (c Word)
-TEXT ·addVW(SB),NOSPLIT,$0
-       MOVW    z+0(FP), R1
-       MOVW    z_len+4(FP), R4
-       MOVW    x+12(FP), R2
-       MOVW    y+24(FP), R3
-       ADD     R4<<2, R1, R4
-       TEQ     R1, R4
-       BNE L3a
-       MOVW    R3, c+28(FP)
-       RET
-L3a:
-       MOVW.P  4(R2), R5
-       ADD.S   R3, R5
-       MOVW.P  R5, 4(R1)
-       B       E3
-L3:
-       MOVW.P  4(R2), R5
-       ADC.S   $0, R5
-       MOVW.P  R5, 4(R1)
-E3:
-       TEQ     R1, R4
-       BNE     L3
-
-       MOVW    $0, R0
-       MOVW.CS $1, R0
-       MOVW    R0, c+28(FP)
-       RET
-
-
-// func subVW(z, x []Word, y Word) (c Word)
-TEXT ·subVW(SB),NOSPLIT,$0
-       MOVW    z+0(FP), R1
-       MOVW    z_len+4(FP), R4
-       MOVW    x+12(FP), R2
-       MOVW    y+24(FP), R3
-       ADD     R4<<2, R1, R4
-       TEQ     R1, R4
-       BNE L4a
-       MOVW    R3, c+28(FP)
-       RET
-L4a:
-       MOVW.P  4(R2), R5
-       SUB.S   R3, R5
-       MOVW.P  R5, 4(R1)
-       B       E4
-L4:
-       MOVW.P  4(R2), R5
-       SBC.S   $0, R5
-       MOVW.P  R5, 4(R1)
-E4:
-       TEQ     R1, R4
-       BNE     L4
-
-       MOVW    $0, R0
-       MOVW.CC $1, R0
-       MOVW    R0, c+28(FP)
-       RET
-
-
  // func lshVU(z, x []Word, s uint) (c Word)
  TEXT ·lshVU(SB),NOSPLIT,$0
         MOVW    z_len+4(FP), R5
diff --git a/src/math/big/arith_arm64.s b/src/math/big/arith_arm64.s

index e0a8b39e780d1a4eaba344b500976f8d4f76f3f5..aa7dd6755d3bb21fcfafbc50ff029d8f9822af60 100644 (file)
--- a/src/math/big/arith_arm64.s
+++ b/src/math/big/arith_arm64.s
@@ -93,164 +93,6 @@ done:
         MOVD    R0, c+72(FP)
         RET
  
-#define vwOneOp(instr, op1)                            \
-       MOVD.P  8(R1), R4;                              \
-       instr   op1, R4;                                \
-       MOVD.P  R4, 8(R3);
-
-// handle the first 1~4 elements before starting iteration in addVW/subVW
-#define vwPreIter(instr1, instr2, counter, target)     \
-       vwOneOp(instr1, R2);                            \
-       SUB     $1, counter;                            \
-       CBZ     counter, target;                        \
-       vwOneOp(instr2, $0);                            \
-       SUB     $1, counter;                            \
-       CBZ     counter, target;                        \
-       vwOneOp(instr2, $0);                            \
-       SUB     $1, counter;                            \
-       CBZ     counter, target;                        \
-       vwOneOp(instr2, $0);
-
-// do one iteration of add or sub in addVW/subVW
-#define vwOneIter(instr, counter, exit)        \
-       CBZ     counter, exit;          \       // careful not to touch the carry flag
-       LDP.P   32(R1), (R4, R5);       \
-       LDP     -16(R1), (R6, R7);      \
-       instr   $0, R4, R8;             \
-       instr   $0, R5, R9;             \
-       instr   $0, R6, R10;            \
-       instr   $0, R7, R11;            \
-       STP.P   (R8, R9), 32(R3);       \
-       STP     (R10, R11), -16(R3);    \
-       SUB     $4, counter;
-
-// do one iteration of copy in addVW/subVW
-#define vwOneIterCopy(counter, exit)                   \
-       CBZ     counter, exit;                          \
-       LDP.P   32(R1), (R4, R5);                       \
-       LDP     -16(R1), (R6, R7);                      \
-       STP.P   (R4, R5), 32(R3);                       \
-       STP     (R6, R7), -16(R3);                      \
-       SUB     $4, counter;
-
-// func addVW(z, x []Word, y Word) (c Word)
-// The 'large' branch handles large 'z'. It checks the carry flag on every iteration
-// and switches to copy if we are done with carries. The copying is skipped as well
-// if 'x' and 'z' happen to share the same underlying storage.
-// The overhead of the checking and branching is visible when 'z' are small (~5%),
-// so set a threshold of 32, and remain the small-sized part entirely untouched.
-TEXT ·addVW(SB),NOSPLIT,$0
-       MOVD    z+0(FP), R3
-       MOVD    z_len+8(FP), R0
-       MOVD    x+24(FP), R1
-       MOVD    y+48(FP), R2
-       CMP     $32, R0
-       BGE     large           // large-sized 'z' and 'x'
-       CBZ     R0, len0        // the length of z is 0
-       MOVD.P  8(R1), R4
-       ADDS    R2, R4          // z[0] = x[0] + y, set carry
-       MOVD.P  R4, 8(R3)
-       SUB     $1, R0
-       CBZ     R0, len1        // the length of z is 1
-       TBZ     $0, R0, two
-       MOVD.P  8(R1), R4       // do it once
-       ADCS    $0, R4
-       MOVD.P  R4, 8(R3)
-       SUB     $1, R0
-two:                           // do it twice
-       TBZ     $1, R0, loop
-       LDP.P   16(R1), (R4, R5)
-       ADCS    $0, R4, R8      // c, z[i] = x[i] + c
-       ADCS    $0, R5, R9
-       STP.P   (R8, R9), 16(R3)
-       SUB     $2, R0
-loop:                          // do four times per round
-       vwOneIter(ADCS, R0, len1)
-       B       loop
-len1:
-       CSET    HS, R2          // extract carry flag
-len0:
-       MOVD    R2, c+56(FP)
-done:
-       RET
-large:
-       AND     $0x3, R0, R10
-       AND     $~0x3, R0
-       // unrolling for the first 1~4 elements to avoid saving the carry
-       // flag in each step, adjust $R0 if we unrolled 4 elements
-       vwPreIter(ADDS, ADCS, R10, add4)
-       SUB     $4, R0
-add4:
-       BCC     copy
-       vwOneIter(ADCS, R0, len1)
-       B       add4
-copy:
-       MOVD    ZR, c+56(FP)
-       CMP     R1, R3
-       BEQ     done
-copy_4:                                // no carry flag, copy the rest
-       vwOneIterCopy(R0, done)
-       B       copy_4
-
-// func subVW(z, x []Word, y Word) (c Word)
-// The 'large' branch handles large 'z'. It checks the carry flag on every iteration
-// and switches to copy if we are done with carries. The copying is skipped as well
-// if 'x' and 'z' happen to share the same underlying storage.
-// The overhead of the checking and branching is visible when 'z' are small (~5%),
-// so set a threshold of 32, and remain the small-sized part entirely untouched.
-TEXT ·subVW(SB),NOSPLIT,$0
-       MOVD    z+0(FP), R3
-       MOVD    z_len+8(FP), R0
-       MOVD    x+24(FP), R1
-       MOVD    y+48(FP), R2
-       CMP     $32, R0
-       BGE     large           // large-sized 'z' and 'x'
-       CBZ     R0, len0        // the length of z is 0
-       MOVD.P  8(R1), R4
-       SUBS    R2, R4          // z[0] = x[0] - y, set carry
-       MOVD.P  R4, 8(R3)
-       SUB     $1, R0
-       CBZ     R0, len1        // the length of z is 1
-       TBZ     $0, R0, two     // do it once
-       MOVD.P  8(R1), R4
-       SBCS    $0, R4
-       MOVD.P  R4, 8(R3)
-       SUB     $1, R0
-two:                           // do it twice
-       TBZ     $1, R0, loop
-       LDP.P   16(R1), (R4, R5)
-       SBCS    $0, R4, R8      // c, z[i] = x[i] + c
-       SBCS    $0, R5, R9
-       STP.P   (R8, R9), 16(R3)
-       SUB     $2, R0
-loop:                          // do four times per round
-       vwOneIter(SBCS, R0, len1)
-       B       loop
-len1:
-       CSET    LO, R2          // extract carry flag
-len0:
-       MOVD    R2, c+56(FP)
-done:
-       RET
-large:
-       AND     $0x3, R0, R10
-       AND     $~0x3, R0
-       // unrolling for the first 1~4 elements to avoid saving the carry
-       // flag in each step, adjust $R0 if we unrolled 4 elements
-       vwPreIter(SUBS, SBCS, R10, sub4)
-       SUB     $4, R0
-sub4:
-       BCS     copy
-       vwOneIter(SBCS, R0, len1)
-       B       sub4
-copy:
-       MOVD    ZR, c+56(FP)
-       CMP     R1, R3
-       BEQ     done
-copy_4:                                // no carry flag, copy the rest
-       vwOneIterCopy(R0, done)
-       B       copy_4
-
  // func lshVU(z, x []Word, s uint) (c Word)
  // This implementation handles the shift operation from the high word to the low word,
  // which may be an error for the case where the low word of x overlaps with the high
diff --git a/src/math/big/arith_decl.go b/src/math/big/arith_decl.go

index ca73485df0c908593fc6013ef337ede23b64abbf..aa838808b94f04a7464ba5dd7157764b3ca4adb6 100644 (file)
--- a/src/math/big/arith_decl.go
+++ b/src/math/big/arith_decl.go
@@ -34,30 +34,6 @@ func addVV(z, x, y []Word) (c Word)
  //go:noescape
  func subVV(z, x, y []Word) (c Word)
  
-// addVW should be an internal detail,
-// but widely used packages access it using linkname.
-// Notable members of the hall of shame include:
-//   - github.com/remyoudompheng/bigfft
-//
-// Do not remove or change the type signature.
-// See go.dev/issue/67401.
-//
-//go:linkname addVW
-//go:noescape
-func addVW(z, x []Word, y Word) (c Word)
-
-// subVW should be an internal detail,
-// but widely used packages access it using linkname.
-// Notable members of the hall of shame include:
-//   - github.com/remyoudompheng/bigfft
-//
-// Do not remove or change the type signature.
-// See go.dev/issue/67401.
-//
-//go:linkname subVW
-//go:noescape
-func subVW(z, x []Word, y Word) (c Word)
-
  // shlVU should be an internal detail (and a stale one at that),
  // but widely used packages access it using linkname.
  // Notable members of the hall of shame include:
diff --git a/src/math/big/arith_decl_pure.go b/src/math/big/arith_decl_pure.go

index 60672d3e6c6f3b2a5da2597f5dc6bd6bdd52b98e..3b051356fb24b7380de6cc99020282f70d0f1f39 100644 (file)
--- a/src/math/big/arith_decl_pure.go
+++ b/src/math/big/arith_decl_pure.go
@@ -14,24 +14,6 @@ func subVV(z, x, y []Word) (c Word) {
         return subVV_g(z, x, y)
  }
  
-func addVW(z, x []Word, y Word) (c Word) {
-       // TODO: remove indirect function call when golang.org/issue/30548 is fixed
-       fn := addVW_g
-       if len(z) > 32 {
-               fn = addVWlarge
-       }
-       return fn(z, x, y)
-}
-
-func subVW(z, x []Word, y Word) (c Word) {
-       // TODO: remove indirect function call when golang.org/issue/30548 is fixed
-       fn := subVW_g
-       if len(z) > 32 {
-               fn = subVWlarge
-       }
-       return fn(z, x, y)
-}
-
  func lshVU(z, x []Word, s uint) (c Word) {
         return lshVU_g(z, x, s)
  }
diff --git a/src/math/big/arith_loong64.s b/src/math/big/arith_loong64.s

index 3480e0e676e71585baf92ba1b5d5edd29da70d8e..8a5140e57a823a87369c6d54782644c1f5437e45 100644 (file)
--- a/src/math/big/arith_loong64.s
+++ b/src/math/big/arith_loong64.s
@@ -42,56 +42,6 @@ done:
         MOVV    R8, c+72(FP)
         RET
  
-// func addVW(z, x []Word, y Word) (c Word)
-TEXT ·addVW(SB),NOSPLIT,$0
-       // input:
-       //   R4: z
-       //   R5: z_len
-       //   R7: x
-       //   R10: y
-       MOVV    z+0(FP), R4
-       MOVV    z_len+8(FP), R5
-       MOVV    x+24(FP), R7
-       MOVV    y+48(FP), R10
-       MOVV    $0, R6
-       SLLV    $3, R5
-loop:
-       BEQ     R5, R6, done
-       MOVV    (R6)(R7), R8
-       ADDV    R8, R10, R9     // x1 + c = z1, if z1 < x1 then z1 overflow
-       SGTU    R8, R9, R10
-       MOVV    R9, (R6)(R4)
-       ADDV    $8, R6
-       JMP     loop
-done:
-       MOVV    R10, c+56(FP)
-       RET
-
-// func subVW(z, x []Word, y Word) (c Word)
-TEXT ·subVW(SB),NOSPLIT,$0
-       // input:
-       //   R4: z
-       //   R5: z_len
-       //   R7: x
-       //   R10: y
-       MOVV    z+0(FP), R4
-       MOVV    z_len+8(FP), R5
-       MOVV    x+24(FP), R7
-       MOVV    y+48(FP), R10
-       MOVV    $0, R6
-       SLLV    $3, R5
-loop:
-       BEQ     R5, R6, done
-       MOVV    (R6)(R7), R8
-       SUBV    R10, R8, R11    // x1 - c = z1, if z1 > x1 then overflow
-       SGTU    R11, R8, R10
-       MOVV    R11, (R6)(R4)
-       ADDV    $8, R6
-       JMP     loop
-done:
-       MOVV    R10, c+56(FP)
-       RET
-
  TEXT ·lshVU(SB),NOSPLIT,$0
         JMP ·lshVU_g(SB)
  
diff --git a/src/math/big/arith_mips64x.s b/src/math/big/arith_mips64x.s

index 6c6da48c327d664cc6095d1099370c2b6d83e058..3b32062b067568daf1e25a53e1f24d8d42a8ef6d 100644 (file)
--- a/src/math/big/arith_mips64x.s
+++ b/src/math/big/arith_mips64x.s
@@ -15,12 +15,6 @@ TEXT ·addVV(SB),NOSPLIT,$0
  TEXT ·subVV(SB),NOSPLIT,$0
         JMP ·subVV_g(SB)
  
-TEXT ·addVW(SB),NOSPLIT,$0
-       JMP ·addVW_g(SB)
-
-TEXT ·subVW(SB),NOSPLIT,$0
-       JMP ·subVW_g(SB)
-
  TEXT ·lshVU(SB),NOSPLIT,$0
         JMP ·lshVU_g(SB)
  
diff --git a/src/math/big/arith_mipsx.s b/src/math/big/arith_mipsx.s

index 0e2a0a4b8b83a85c7716ca8cba294fa38d0825e9..edd7456c3eff35ce15f9dd842561a9e2740d9586 100644 (file)
--- a/src/math/big/arith_mipsx.s
+++ b/src/math/big/arith_mipsx.s
@@ -15,12 +15,6 @@ TEXT ·addVV(SB),NOSPLIT,$0
  TEXT ·subVV(SB),NOSPLIT,$0
         JMP     ·subVV_g(SB)
  
-TEXT ·addVW(SB),NOSPLIT,$0
-       JMP     ·addVW_g(SB)
-
-TEXT ·subVW(SB),NOSPLIT,$0
-       JMP     ·subVW_g(SB)
-
  TEXT ·lshVU(SB),NOSPLIT,$0
         JMP     ·lshVU_g(SB)
  
diff --git a/src/math/big/arith_ppc64x.s b/src/math/big/arith_ppc64x.s

index a47ea83aa3144dcbbc58e9efe4d6a6ed7953fd6c..5392c1be26ed47dd4bcdd0030b2677606a6e7f95 100644 (file)
--- a/src/math/big/arith_ppc64x.s
+++ b/src/math/big/arith_ppc64x.s
@@ -188,157 +188,6 @@ done:
         MOVD  R4, c+72(FP)
         RET
  
-// func addVW(z, x []Word, y Word) (c Word)
-TEXT ·addVW(SB), NOSPLIT, $0
-       MOVD z+0(FP), R10       // R10 = z[]
-       MOVD x+24(FP), R8       // R8 = x[]
-       MOVD y+48(FP), R4       // R4 = y = c
-       MOVD z_len+8(FP), R11   // R11 = z_len
-
-       CMP   R11, $0           // If z_len is zero, return
-       BEQ   done
-
-       // We will process the first iteration out of the loop so we capture
-       // the value of c. In the subsequent iterations, we will rely on the
-       // value of CA set here.
-       MOVD  0(R8), R20        // R20 = x[i]
-       ADD   $-1, R11          // R11 = z_len - 1
-       ADDC  R20, R4, R6       // R6 = x[i] + c
-       CMP   R11, $0           // If z_len was 1, we are done
-       MOVD  R6, 0(R10)        // z[i]
-       BEQ   final
-
-       // We will read 4 elements per iteration
-       SRDCC $2, R11, R9       // R9 = z_len/4
-       DCBT  (R8)
-       MOVD  R9, CTR           // Set up the loop counter
-       BEQ   tail              // If R9 = 0, we can't use the loop
-       PCALIGN $16
-
-loop:
-       MOVD  8(R8), R20        // R20 = x[i]
-       MOVD  16(R8), R21       // R21 = x[i+1]
-       MOVD  24(R8), R22       // R22 = x[i+2]
-       MOVDU 32(R8), R23       // R23 = x[i+3]
-       ADDZE R20, R24          // R24 = x[i] + CA
-       ADDZE R21, R25          // R25 = x[i+1] + CA
-       ADDZE R22, R26          // R26 = x[i+2] + CA
-       ADDZE R23, R27          // R27 = x[i+3] + CA
-       MOVD  R24, 8(R10)       // z[i]
-       MOVD  R25, 16(R10)      // z[i+1]
-       MOVD  R26, 24(R10)      // z[i+2]
-       MOVDU R27, 32(R10)      // z[i+3]
-       ADD   $-4, R11          // R11 = z_len - 4
-       BDNZ  loop
-
-       // We may have some elements to read
-       CMP R11, $0
-       BEQ final
-
-tail:
-       MOVDU 8(R8), R20
-       ADDZE R20, R24
-       ADD $-1, R11
-       MOVDU R24, 8(R10)
-       CMP R11, $0
-       BEQ final
-
-       MOVDU 8(R8), R20
-       ADDZE R20, R24
-       ADD $-1, R11
-       MOVDU R24, 8(R10)
-       CMP R11, $0
-       BEQ final
-
-       MOVD 8(R8), R20
-       ADDZE R20, R24
-       MOVD R24, 8(R10)
-
-final:
-       ADDZE R0, R4            // c = CA
-done:
-       MOVD  R4, c+56(FP)
-       RET
-
-// func subVW(z, x []Word, y Word) (c Word)
-TEXT ·subVW(SB), NOSPLIT, $0
-       MOVD  z+0(FP), R10      // R10 = z[]
-       MOVD  x+24(FP), R8      // R8 = x[]
-       MOVD  y+48(FP), R4      // R4 = y = c
-       MOVD  z_len+8(FP), R11  // R11 = z_len
-
-       CMP   R11, $0           // If z_len is zero, return
-       BEQ   done
-
-       // We will process the first iteration out of the loop so we capture
-       // the value of c. In the subsequent iterations, we will rely on the
-       // value of CA set here.
-       MOVD  0(R8), R20        // R20 = x[i]
-       ADD   $-1, R11          // R11 = z_len - 1
-       SUBC  R4, R20, R6       // R6 = x[i] - c
-       CMP   R11, $0           // If z_len was 1, we are done
-       MOVD  R6, 0(R10)        // z[i]
-       BEQ   final
-
-       // We will read 4 elements per iteration
-       SRDCC $2, R11, R9       // R9 = z_len/4
-       DCBT  (R8)
-       MOVD  R9, CTR           // Set up the loop counter
-       BEQ   tail              // If R9 = 0, we can't use the loop
-
-       // The loop here is almost the same as the one used in s390x, but
-       // we don't need to capture CA every iteration because we've already
-       // done that above.
-
-       PCALIGN $16
-loop:
-       MOVD  8(R8), R20
-       MOVD  16(R8), R21
-       MOVD  24(R8), R22
-       MOVDU 32(R8), R23
-       SUBE  R0, R20
-       SUBE  R0, R21
-       SUBE  R0, R22
-       SUBE  R0, R23
-       MOVD  R20, 8(R10)
-       MOVD  R21, 16(R10)
-       MOVD  R22, 24(R10)
-       MOVDU R23, 32(R10)
-       ADD   $-4, R11
-       BDNZ  loop
-
-       // We may have some elements to read
-       CMP   R11, $0
-       BEQ   final
-
-tail:
-       MOVDU 8(R8), R20
-       SUBE  R0, R20
-       ADD   $-1, R11
-       MOVDU R20, 8(R10)
-       CMP   R11, $0
-       BEQ   final
-
-       MOVDU 8(R8), R20
-       SUBE  R0, R20
-       ADD   $-1, R11
-       MOVDU R20, 8(R10)
-       CMP   R11, $0
-       BEQ   final
-
-       MOVD  8(R8), R20
-       SUBE  R0, R20
-       MOVD  R20, 8(R10)
-
-final:
-       // Capture CA
-       SUBE  R4, R4
-       NEG   R4, R4
-
-done:
-       MOVD  R4, c+56(FP)
-       RET
-
  //func lshVU(z, x []Word, s uint) (c Word)
  TEXT ·lshVU(SB), NOSPLIT, $0
         MOVD    z+0(FP), R3
diff --git a/src/math/big/arith_riscv64.s b/src/math/big/arith_riscv64.s

index 1ba25ce3874dc115834dd3bdd38cd7072d7db35e..406cf38d1f73f58d6eebd934a0158cc7715fc41f 100644 (file)
--- a/src/math/big/arith_riscv64.s
+++ b/src/math/big/arith_riscv64.s
@@ -173,126 +173,6 @@ done:
         MOV     X29, c+72(FP)   // return b
         RET
  
-TEXT ·addVW(SB),NOSPLIT,$0
-       MOV     x+24(FP), X5
-       MOV     y+48(FP), X6
-       MOV     z+0(FP), X7
-       MOV     z_len+8(FP), X30
-
-       MOV     $4, X28
-       MOV     X6, X29         // c = y
-
-       BEQZ    X30, done
-       BLTU    X30, X28, loop1
-
-loop4:
-       MOV     0(X5), X8       // x[0]
-       MOV     8(X5), X11      // x[1]
-       MOV     16(X5), X14     // x[2]
-       MOV     24(X5), X17     // x[3]
-
-       ADD     X8, X29, X10    // z[0] = x[0] + c
-       SLTU    X8, X10, X29    // next c
-
-       ADD     X11, X29, X13   // z[1] = x[1] + c
-       SLTU    X11, X13, X29   // next c
-
-       ADD     X14, X29, X16   // z[2] = x[2] + c
-       SLTU    X14, X16, X29   // next c
-
-       ADD     X17, X29, X19   // z[3] = x[3] + c
-       SLTU    X17, X19, X29   // next c
-
-       MOV     X10, 0(X7)      // z[0]
-       MOV     X13, 8(X7)      // z[1]
-       MOV     X16, 16(X7)     // z[2]
-       MOV     X19, 24(X7)     // z[3]
-
-       ADD     $32, X5
-       ADD     $32, X7
-       SUB     $4, X30
-
-       BGEU    X30, X28, loop4
-       BEQZ    X30, done
-
-loop1:
-       MOV     0(X5), X10      // x
-
-       ADD     X10, X29, X12   // z = x + c
-       SLTU    X10, X12, X29   // next c
-
-       MOV     X12, 0(X7)      // z
-
-       ADD     $8, X5
-       ADD     $8, X7
-       SUB     $1, X30
-
-       BNEZ    X30, loop1
-
-done:
-       MOV     X29, c+56(FP)   // return c
-       RET
-
-TEXT ·subVW(SB),NOSPLIT,$0
-       MOV     x+24(FP), X5
-       MOV     y+48(FP), X6
-       MOV     z+0(FP), X7
-       MOV     z_len+8(FP), X30
-
-       MOV     $4, X28
-       MOV     X6, X29         // b = y
-
-       BEQZ    X30, done
-       BLTU    X30, X28, loop1
-
-loop4:
-       MOV     0(X5), X8       // x[0]
-       MOV     8(X5), X11      // x[1]
-       MOV     16(X5), X14     // x[2]
-       MOV     24(X5), X17     // x[3]
-
-       SUB     X29, X8, X10    // z[0] = x[0] - b
-       SLTU    X10, X8, X29    // next b
-
-       SUB     X29, X11, X13   // z[1] = x[1] - b
-       SLTU    X13, X11, X29   // next b
-
-       SUB     X29, X14, X16   // z[2] = x[2] - b
-       SLTU    X16, X14, X29   // next b
-
-       SUB     X29, X17, X19   // z[3] = x[3] - b
-       SLTU    X19, X17, X29   // next b
-
-       MOV     X10, 0(X7)      // z[0]
-       MOV     X13, 8(X7)      // z[1]
-       MOV     X16, 16(X7)     // z[2]
-       MOV     X19, 24(X7)     // z[3]
-
-       ADD     $32, X5
-       ADD     $32, X7
-       SUB     $4, X30
-
-       BGEU    X30, X28, loop4
-       BEQZ    X30, done
-
-loop1:
-       MOV     0(X5), X10      // x
-
-       SUB     X29, X10, X12   // z = x - b
-       SLTU    X12, X10, X29   // next b
-
-       MOV     X12, 0(X7)      // z
-
-       ADD     $8, X5
-       ADD     $8, X7
-       SUB     $1, X30
-
-       BNEZ    X30, loop1
-
-done:
-       MOV     X29, c+56(FP)   // return b
-       RET
-
  TEXT ·lshVU(SB),NOSPLIT,$0
         JMP ·lshVU_g(SB)
  
diff --git a/src/math/big/arith_s390x.s b/src/math/big/arith_s390x.s

index 57b263a4c3d2d8f621d438cc12b7837536576bac..a03660be6296c1b627c515e8bcf867d50091742f 100644 (file)
--- a/src/math/big/arith_s390x.s
+++ b/src/math/big/arith_s390x.s
@@ -500,188 +500,6 @@ E1:
         MOVD R4, c+72(FP) // return c
         RET
  
-TEXT ·addVW(SB), NOSPLIT, $0
-       MOVD z_len+8(FP), R5 // length of z
-       MOVD x+24(FP), R6
-       MOVD y+48(FP), R7    // c = y
-       MOVD z+0(FP), R8
-
-       CMPBEQ R5, $0, returnC // if len(z) == 0, we can have an early return
-
-       // Add the first two words, and determine which path (copy path or loop path) to take based on the carry flag.
-       ADDC   0(R6), R7
-       MOVD   R7, 0(R8)
-       CMPBEQ R5, $1, returnResult // len(z) == 1
-       MOVD   $0, R9
-       ADDE   8(R6), R9
-       MOVD   R9, 8(R8)
-       CMPBEQ R5, $2, returnResult // len(z) == 2
-
-       // Update the counters
-       MOVD $16, R12    // i = 2
-       MOVD $-2(R5), R5 // n = n - 2
-
-loopOverEachWord:
-       BRC  $12, copySetup // carry = 0, copy the rest
-       MOVD $1, R9
-
-       // Originally we used the carry flag generated in the previous iteration
-       // (i.e: ADDE could be used here to do the addition).  However, since we
-       // already know carry is 1 (otherwise we will go to copy section), we can use
-       // ADDC here so the current iteration does not depend on the carry flag
-       // generated in the previous iteration. This could be useful when branch prediction happens.
-       ADDC 0(R6)(R12*1), R9
-       MOVD R9, 0(R8)(R12*1) // z[i] = x[i] + c
-
-       MOVD  $8(R12), R12         // i++
-       BRCTG R5, loopOverEachWord // n--
-
-// Return the current carry value
-returnResult:
-       MOVD $0, R0
-       ADDE R0, R0
-       MOVD R0, c+56(FP)
-       RET
-
-// Update position of x(R6) and z(R8) based on the current counter value and perform copying.
-// With the assumption that x and z will not overlap with each other or x and z will
-// point to same memory region, we can use a faster version of copy using only MVC here.
-// In the following implementation, we have three copy loops, each copying a word, 4 words, and
-// 32 words at a time.  Via benchmarking, this implementation is faster than calling runtime·memmove.
-copySetup:
-       ADD R12, R6
-       ADD R12, R8
-
-       CMPBGE R5, $4, mediumLoop
-
-smallLoop:  // does a loop unrolling to copy word when n < 4
-       CMPBEQ R5, $0, returnZero
-       MVC    $8, 0(R6), 0(R8)
-       CMPBEQ R5, $1, returnZero
-       MVC    $8, 8(R6), 8(R8)
-       CMPBEQ R5, $2, returnZero
-       MVC    $8, 16(R6), 16(R8)
-
-returnZero:
-       MOVD $0, c+56(FP) // return 0 as carry
-       RET
-
-mediumLoop:
-       CMPBLT R5, $4, smallLoop
-       CMPBLT R5, $32, mediumLoopBody
-
-largeLoop:  // Copying 256 bytes at a time.
-       MVC    $256, 0(R6), 0(R8)
-       MOVD   $256(R6), R6
-       MOVD   $256(R8), R8
-       MOVD   $-32(R5), R5
-       CMPBGE R5, $32, largeLoop
-       BR     mediumLoop
-
-mediumLoopBody:  // Copying 32 bytes at a time
-       MVC    $32, 0(R6), 0(R8)
-       MOVD   $32(R6), R6
-       MOVD   $32(R8), R8
-       MOVD   $-4(R5), R5
-       CMPBGE R5, $4, mediumLoopBody
-       BR     smallLoop
-
-returnC:
-       MOVD R7, c+56(FP)
-       RET
-
-TEXT ·subVW(SB), NOSPLIT, $0
-       MOVD z_len+8(FP), R5
-       MOVD x+24(FP), R6
-       MOVD y+48(FP), R7    // The borrow bit passed in
-       MOVD z+0(FP), R8
-       MOVD $0, R0          // R0 is a temporary variable used during computation. Ensure it has zero in it.
-
-       CMPBEQ R5, $0, returnC // len(z) == 0, have an early return
-
-       // Subtract the first two words, and determine which path (copy path or loop path) to take based on the borrow flag
-       MOVD   0(R6), R9
-       SUBC   R7, R9
-       MOVD   R9, 0(R8)
-       CMPBEQ R5, $1, returnResult
-       MOVD   8(R6), R9
-       SUBE   R0, R9
-       MOVD   R9, 8(R8)
-       CMPBEQ R5, $2, returnResult
-
-       // Update the counters
-       MOVD $16, R12    // i = 2
-       MOVD $-2(R5), R5 // n = n - 2
-
-loopOverEachWord:
-       BRC  $3, copySetup    // no borrow, copy the rest
-       MOVD 0(R6)(R12*1), R9
-
-       // Originally we used the borrow flag generated in the previous iteration
-       // (i.e: SUBE could be used here to do the subtraction). However, since we
-       // already know borrow is 1 (otherwise we will go to copy section), we can
-       // use SUBC here so the current iteration does not depend on the borrow flag
-       // generated in the previous iteration. This could be useful when branch prediction happens.
-       SUBC $1, R9
-       MOVD R9, 0(R8)(R12*1) // z[i] = x[i] - 1
-
-       MOVD  $8(R12), R12         // i++
-       BRCTG R5, loopOverEachWord // n--
-
-// return the current borrow value
-returnResult:
-       SUBE R0, R0
-       NEG  R0, R0
-       MOVD R0, c+56(FP)
-       RET
-
-// Update position of x(R6) and z(R8) based on the current counter value and perform copying.
-// With the assumption that x and z will not overlap with each other or x and z will
-// point to same memory region, we can use a faster version of copy using only MVC here.
-// In the following implementation, we have three copy loops, each copying a word, 4 words, and
-// 32 words at a time. Via benchmarking, this implementation is faster than calling runtime·memmove.
-copySetup:
-       ADD R12, R6
-       ADD R12, R8
-
-       CMPBGE R5, $4, mediumLoop
-
-smallLoop:  // does a loop unrolling to copy word when n < 4
-       CMPBEQ R5, $0, returnZero
-       MVC    $8, 0(R6), 0(R8)
-       CMPBEQ R5, $1, returnZero
-       MVC    $8, 8(R6), 8(R8)
-       CMPBEQ R5, $2, returnZero
-       MVC    $8, 16(R6), 16(R8)
-
-returnZero:
-       MOVD $0, c+56(FP) // return 0 as borrow
-       RET
-
-mediumLoop:
-       CMPBLT R5, $4, smallLoop
-       CMPBLT R5, $32, mediumLoopBody
-
-largeLoop:  // Copying 256 bytes at a time
-       MVC    $256, 0(R6), 0(R8)
-       MOVD   $256(R6), R6
-       MOVD   $256(R8), R8
-       MOVD   $-32(R5), R5
-       CMPBGE R5, $32, largeLoop
-       BR     mediumLoop
-
-mediumLoopBody:  // Copying 32 bytes at a time
-       MVC    $32, 0(R6), 0(R8)
-       MOVD   $32(R6), R6
-       MOVD   $32(R8), R8
-       MOVD   $-4(R5), R5
-       CMPBGE R5, $4, mediumLoopBody
-       BR     smallLoop
-
-returnC:
-       MOVD R7, c+56(FP)
-       RET
-
  // func lshVU(z, x []Word, s uint) (c Word)
  TEXT ·lshVU(SB), NOSPLIT, $0
         BR ·lshVU_g(SB)
diff --git a/src/math/big/arith_test.go b/src/math/big/arith_test.go

index b6e7304a132c8739618d7323d2a94e1f65ec4176..bd9f96870b1d6da459ebd0e4587a2b8d54e19d32 100644 (file)
--- a/src/math/big/arith_test.go
+++ b/src/math/big/arith_test.go
@@ -28,8 +28,8 @@ var shifts = []uint{1, 2, 3, _W/4 - 1, _W / 4, _W/4 + 1, _W/2 - 1, _W / 2, _W/2
  
  func TestAddVV(t *testing.T)      { testVV(t, "addVV", addVV, addVV_g) }
  func TestSubVV(t *testing.T)      { testVV(t, "subVV", subVV, subVV_g) }
-func TestAddVW(t *testing.T)      { testVW(t, "addVW", addVW, addVW_g, words4) }
-func TestSubVW(t *testing.T)      { testVW(t, "subVW", subVW, subVW_g, words4) }
+func TestAddVW(t *testing.T)      { testVW(t, "addVW", addVW, addVW_ref, words4) }
+func TestSubVW(t *testing.T)      { testVW(t, "subVW", subVW, subVW_ref, words4) }
  func TestLshVU(t *testing.T)      { testVU(t, "lshVU", lshVU, lshVU_g, shifts) }
  func TestRshVU(t *testing.T)      { testVU(t, "rshVU", rshVU, rshVU_g, shifts) }
  func TestMulAddVWW(t *testing.T)  { testVWW(t, "mulAddVWW", mulAddVWW, mulAddVWW_g, muls) }
@@ -865,21 +865,15 @@ func benchVV(fn func(z, x, y []Word) Word) benchFunc {
  }
  
  func BenchmarkAddVW(b *testing.B) {
-       bench(b, "/impl=asm/data=random", benchVW(addVW, 123))
-       bench(b, "/impl=asm/data=carry", benchCarryVW(addVW, ^Word(0), 1))
-       bench(b, "/impl=asm/data=shortcut", benchShortVW(addVW, 123))
-       bench(b, "/impl=go/data=random", benchVW(addVW_g, 123))
-       bench(b, "/impl=go/data=carry", benchCarryVW(addVW_g, ^Word(0), 1))
-       bench(b, "/impl=go/data=shortcut", benchShortVW(addVW_g, 123))
+       bench(b, "/data=random", benchVW(addVW, 123))
+       bench(b, "/data=carry", benchCarryVW(addVW, ^Word(0), 1))
+       bench(b, "/data=shortcut", benchShortVW(addVW, 123))
  }
  
  func BenchmarkSubVW(b *testing.B) {
-       bench(b, "/impl=asm/data=random", benchVW(subVW, 123))
-       bench(b, "/impl=asm/data=carry", benchCarryVW(subVW, 0, 1))
-       bench(b, "/impl=asm/data=shortcut", benchShortVW(subVW, 123))
-       bench(b, "/impl=go/data=random", benchVW(subVW_g, 123))
-       bench(b, "/impl=go/data=carry", benchCarryVW(subVW_g, 0, 1))
-       bench(b, "/impl=go/data=shortcut", benchShortVW(subVW_g, 123))
+       bench(b, "/data=random", benchVW(subVW, 123))
+       bench(b, "/data=carry", benchCarryVW(subVW, 0, 1))
+       bench(b, "/data=shortcut", benchShortVW(subVW, 123))
  }
  
  func benchVW(fn func(z, x []Word, w Word) Word, w Word) benchFunc {
diff --git a/src/math/big/arith_wasm.s b/src/math/big/arith_wasm.s

index 8aadeaa28d897bad7a4958cd5c8dff2c1de6d0af..3a9aa4ddcb2dca330e6c1e3433f020751e307101 100644 (file)
--- a/src/math/big/arith_wasm.s
+++ b/src/math/big/arith_wasm.s
@@ -12,12 +12,6 @@ TEXT ·addVV(SB),NOSPLIT,$0
  TEXT ·subVV(SB),NOSPLIT,$0
         JMP ·subVV_g(SB)
  
-TEXT ·addVW(SB),NOSPLIT,$0
-       JMP ·addVW_g(SB)
-
-TEXT ·subVW(SB),NOSPLIT,$0
-       JMP ·subVW_g(SB)
-
  TEXT ·lshVU(SB),NOSPLIT,$0
         JMP ·lshVU_g(SB)
author	Russ Cox <rsc@golang.org>
	Mon, 7 Apr 2025 21:13:20 +0000 (17:13 -0400)
committer	Russ Cox <rsc@golang.org>
	Fri, 18 Apr 2025 22:07:59 +0000 (15:07 -0700)
src/cmd/compile/internal/test/inl_test.go		patch \| blob \| history
src/math/big/arith.go		patch \| blob \| history
src/math/big/arith_386.s		patch \| blob \| history
src/math/big/arith_amd64.s		patch \| blob \| history
src/math/big/arith_arm.s		patch \| blob \| history
src/math/big/arith_arm64.s		patch \| blob \| history
src/math/big/arith_decl.go		patch \| blob \| history
src/math/big/arith_decl_pure.go		patch \| blob \| history
src/math/big/arith_loong64.s		patch \| blob \| history
src/math/big/arith_mips64x.s		patch \| blob \| history
src/math/big/arith_mipsx.s		patch \| blob \| history
src/math/big/arith_ppc64x.s		patch \| blob \| history
src/math/big/arith_riscv64.s		patch \| blob \| history
src/math/big/arith_s390x.s		patch \| blob \| history
src/math/big/arith_test.go		patch \| blob \| history
src/math/big/arith_wasm.s		patch \| blob \| history