}
-// ----------------------------------------------------------------------------
-// Elementary operations on vectors
-
-// All higher-level functions use these elementary vector operations.
-// The function pointers f are initialized with default implementations
-// f_g, written in Go for portability. The corresponding assembly routines
-// f_s should be installed if they exist.
-var (
- // addVV sets z and returns c such that z+c = x+y.
- addVV func(z, x, y *Word, n int) (c Word) = addVV_g
-
- // subVV sets z and returns c such that z-c = x-y.
- subVV func(z, x, y *Word, n int) (c Word) = subVV_g
-
- // addVW sets z and returns c such that z+c = x-y.
- addVW func(z, x *Word, y Word, n int) (c Word) = addVW_g
-
- // subVW sets z and returns c such that z-c = x-y.
- subVW func(z, x *Word, y Word, n int) (c Word) = subVW_g
-
- // mulAddVWW sets z and returns c such that z+c = x*y + r.
- mulAddVWW func(z, x *Word, y, r Word, n int) (c Word) = mulAddVWW_g
-
- // addMulVVW sets z and returns c such that z+c = z + x*y.
- addMulVVW func(z, x *Word, y Word, n int) (c Word) = addMulVVW_g
-
- // divWVW sets z and returns r such that z-r = (xn<<(n*_W) + x) / y.
- divWVW func(z *Word, xn Word, x *Word, y Word, n int) (r Word) = divWVW_g
-)
-
-
-func init() {
- // Uncomment to use generic routines.
- //return;
-
- // Install assembly routines.
- addVV = addVV_s
- subVV = subVV_s
- addVW = addVW_s
- subVW = subVW_s
- mulAddVWW = mulAddVWW_s
- addMulVVW = addMulVVW_s
- divWVW = divWVW_s
-}
-
-
func (p *Word) at(i int) *Word {
return (*Word)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) + uintptr(i)*_S))
}
-func addVV_s(z, x, y *Word, n int) (c Word)
+func addVV(z, x, y *Word, n int) (c Word)
func addVV_g(z, x, y *Word, n int) (c Word) {
for i := 0; i < n; i++ {
c, *z.at(i) = addWW_g(*x.at(i), *y.at(i), c)
}
-func subVV_s(z, x, y *Word, n int) (c Word)
+func subVV(z, x, y *Word, n int) (c Word)
func subVV_g(z, x, y *Word, n int) (c Word) {
for i := 0; i < n; i++ {
c, *z.at(i) = subWW_g(*x.at(i), *y.at(i), c)
}
-func addVW_s(z, x *Word, y Word, n int) (c Word)
+func addVW(z, x *Word, y Word, n int) (c Word)
func addVW_g(z, x *Word, y Word, n int) (c Word) {
c = y
for i := 0; i < n; i++ {
}
-func subVW_s(z, x *Word, y Word, n int) (c Word)
+func subVW(z, x *Word, y Word, n int) (c Word)
func subVW_g(z, x *Word, y Word, n int) (c Word) {
c = y
for i := 0; i < n; i++ {
}
-func mulAddVWW_s(z, x *Word, y, r Word, n int) (c Word)
+func mulAddVWW(z, x *Word, y, r Word, n int) (c Word)
func mulAddVWW_g(z, x *Word, y, r Word, n int) (c Word) {
c = r
for i := 0; i < n; i++ {
}
-func addMulVVW_s(z, x *Word, y Word, n int) (c Word)
+func addMulVVW(z, x *Word, y Word, n int) (c Word)
func addMulVVW_g(z, x *Word, y Word, n int) (c Word) {
for i := 0; i < n; i++ {
z1, z0 := mulAddWWW_g(*x.at(i), y, *z.at(i))
}
-func divWVW_s(z *Word, xn Word, x *Word, y Word, n int) (r Word)
+func divWVW(z *Word, xn Word, x *Word, y Word, n int) (r Word)
func divWVW_g(z *Word, xn Word, x *Word, y Word, n int) (r Word) {
r = xn
for i := n - 1; i >= 0; i-- {
// This file provides fast assembly versions for the elementary
// arithmetic operations on vectors implemented in arith.go.
-// func addVV_s(z, x, y *Word, n int) (c Word)
-TEXT ·addVV_s(SB),7,$0
+// func addVV(z, x, y *Word, n int) (c Word)
+TEXT ·addVV(SB),7,$0
MOVL z+0(FP), DI
MOVL x+4(FP), SI
MOVL y+8(FP), CX
RET
-// func subVV_s(z, x, y *Word, n int) (c Word)
-// (same as addVV_s except for SBBL instead of ADCL and label names)
-TEXT ·subVV_s(SB),7,$0
+// func subVV(z, x, y *Word, n int) (c Word)
+// (same as addVV except for SBBL instead of ADCL and label names)
+TEXT ·subVV(SB),7,$0
MOVL z+0(FP), DI
MOVL x+4(FP), SI
MOVL y+8(FP), CX
RET
-// func addVW_s(z, x *Word, y Word, n int) (c Word)
-TEXT ·addVW_s(SB),7,$0
+// func addVW(z, x *Word, y Word, n int) (c Word)
+TEXT ·addVW(SB),7,$0
MOVL z+0(FP), DI
MOVL x+4(FP), SI
MOVL y+8(FP), AX // c = y
RET
-// func subVW_s(z, x *Word, y Word, n int) (c Word)
-TEXT ·subVW_s(SB),7,$0
+// func subVW(z, x *Word, y Word, n int) (c Word)
+TEXT ·subVW(SB),7,$0
MOVL z+0(FP), DI
MOVL x+4(FP), SI
MOVL y+8(FP), AX // c = y
RET
-// func mulAddVWW_s(z, x *Word, y, r Word, n int) (c Word)
-TEXT ·mulAddVWW_s(SB),7,$0
+// func mulAddVWW(z, x *Word, y, r Word, n int) (c Word)
+TEXT ·mulAddVWW(SB),7,$0
MOVL z+0(FP), DI
MOVL x+4(FP), SI
MOVL y+8(FP), BP
RET
-// func addMulVVW_s(z, x *Word, y Word, n int) (c Word)
-TEXT ·addMulVVW_s(SB),7,$0
+// func addMulVVW(z, x *Word, y Word, n int) (c Word)
+TEXT ·addMulVVW(SB),7,$0
MOVL z+0(FP), DI
MOVL x+4(FP), SI
MOVL y+8(FP), BP
RET
-// divWVW_s(z* Word, xn Word, x *Word, y Word, n int) (r Word)
-TEXT ·divWVW_s(SB),7,$0
+// divWVW(z* Word, xn Word, x *Word, y Word, n int) (r Word)
+TEXT ·divWVW(SB),7,$0
MOVL z+0(FP), DI
MOVL xn+4(FP), DX // r = xn
MOVL x+8(FP), SI
// TODO(gri) - experiment with unrolled loops for faster execution
-// func addVV_s(z, x, y *Word, n int) (c Word)
-TEXT ·addVV_s(SB),7,$0
+// func addVV(z, x, y *Word, n int) (c Word)
+TEXT ·addVV(SB),7,$0
MOVQ z+0(FP), R10
MOVQ x+8(FP), R8
MOVQ y+16(FP), R9
RET
-// func subVV_s(z, x, y *Word, n int) (c Word)
+// func subVV(z, x, y *Word, n int) (c Word)
// (same as addVV_s except for SBBQ instead of ADCQ and label names)
-TEXT ·subVV_s(SB),7,$0
+TEXT ·subVV(SB),7,$0
MOVQ z+0(FP), R10
MOVQ x+8(FP), R8
MOVQ y+16(FP), R9
RET
-// func addVW_s(z, x *Word, y Word, n int) (c Word)
-TEXT ·addVW_s(SB),7,$0
+// func addVW(z, x *Word, y Word, n int) (c Word)
+TEXT ·addVW(SB),7,$0
MOVQ z+0(FP), R10
MOVQ x+8(FP), R8
MOVQ y+16(FP), AX // c = y
RET
-// func subVW_s(z, x *Word, y Word, n int) (c Word)
-TEXT ·subVW_s(SB),7,$0
+// func subVW(z, x *Word, y Word, n int) (c Word)
+TEXT ·subVW(SB),7,$0
MOVQ z+0(FP), R10
MOVQ x+8(FP), R8
MOVQ y+16(FP), AX // c = y
RET
-// func mulAddVWW_s(z, x *Word, y, r Word, n int) (c Word)
-TEXT ·mulAddVWW_s(SB),7,$0
+// func mulAddVWW(z, x *Word, y, r Word, n int) (c Word)
+TEXT ·mulAddVWW(SB),7,$0
MOVQ z+0(FP), R10
MOVQ x+8(FP), R8
MOVQ y+16(FP), R9
RET
-// func addMulVVW_s(z, x *Word, y Word, n int) (c Word)
-TEXT ·addMulVVW_s(SB),7,$0
+// func addMulVVW(z, x *Word, y Word, n int) (c Word)
+TEXT ·addMulVVW(SB),7,$0
MOVQ z+0(FP), R10
MOVQ x+8(FP), R8
MOVQ y+16(FP), R9
RET
-// divWVW_s(z* Word, xn Word, x *Word, y Word, n int) (r Word)
-TEXT ·divWVW_s(SB),7,$0
+// divWVW(z* Word, xn Word, x *Word, y Word, n int) (r Word)
+TEXT ·divWVW(SB),7,$0
MOVQ z+0(FP), R10
MOVQ xn+8(FP), DX // r = xn
MOVQ x+16(FP), R8
// arithmetic operations on vectors implemented in arith.go.
// TODO(gri) Implement these routines.
-TEXT ·addVV_s(SB),7,$0
+TEXT ·addVV(SB),7,$0
B ·addVV_g(SB)
-TEXT ·subVV_s(SB),7,$0
+TEXT ·subVV(SB),7,$0
B ·subVV_g(SB)
-TEXT ·addVW_s(SB),7,$0
+TEXT ·addVW(SB),7,$0
B ·addVW_g(SB)
-TEXT ·subVW_s(SB),7,$0
+TEXT ·subVW(SB),7,$0
B ·subVW_g(SB)
-TEXT ·mulAddVWW_s(SB),7,$0
+TEXT ·mulAddVWW(SB),7,$0
B ·mulAddVWW_g(SB)
-TEXT ·addMulVVW_s(SB),7,$0
+TEXT ·addMulVVW(SB),7,$0
B ·addMulVVW_g(SB)
-TEXT ·divWVW_s(SB),7,$0
+TEXT ·divWVW(SB),7,$0
B ·divWVW_g(SB)