--- /dev/null
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This file provides fast assembly versions for the elementary
+// arithmetic operations on vectors implemented in arith.go.
+
+TEXT big·useAsm(SB),7,$0
+ MOVB $0, 4(SP) // assembly routines disabled
+ RET
+
+
+// TODO(gri) Implement these routines and enable them.
+TEXT big·addVV_s(SB),7,$0
+TEXT big·subVV_s(SB),7,$0
+TEXT big·addVW_s(SB),7,$0
+TEXT big·subVW_s(SB),7,$0
+TEXT big·mulAddVWW_s(SB),7,$0
+TEXT big·addMulVVW_s(SB),7,$0
+TEXT big·divWVW_s(SB),7,$0
+ RET
+
+
+// TODO(gri) Implement this routine completely in Go.
+// At the moment we need this assembly version.
+TEXT big·divWWW_s(SB),7,$0
+ MOVL a+0(FP), DX
+ MOVL a+4(FP), AX
+ DIVL a+8(FP)
+ MOVL AX, a+12(FP)
+ MOVL DX, a+16(FP)
+ RET
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-// This file provides fast assembly versions of the routines in arith.go.
+// This file provides fast assembly versions for the elementary
+// arithmetic operations on vectors implemented in arith.go.
TEXT big·useAsm(SB),7,$0
- MOVB $1, 8(SP)
+ MOVB $1, 8(SP) // assembly routines enabled
RET
-// ----------------------------------------------------------------------------
-// Elementary operations on words
-
-// func addWW_s(x, y, c Word) (z1, z0 Word)
-// z1<<_W + z0 = x+y+c, with c == 0 or 1
-TEXT big·addWW_s(SB),7,$0
- MOVQ a+0(FP), AX
- XORQ DX, DX
- ADDQ a+8(FP), AX
- ADCQ $0, DX
- ADDQ a+16(FP), AX
- ADCQ $0, DX
- MOVQ DX, a+24(FP)
- MOVQ AX, a+32(FP)
- RET
-
-
-// func subWW_s(x, y, c Word) (z1, z0 Word)
-// z1<<_W + z0 = x-y-c, with c == 0 or 1
-TEXT big·subWW_s(SB),7,$0
- MOVQ a+0(FP), AX
- XORQ DX, DX
- SUBQ a+8(FP), AX
- ADCQ $0, DX
- SUBQ a+16(FP), AX
- ADCQ $0, DX
- MOVQ DX, a+24(FP)
- MOVQ AX, a+32(FP)
- RET
-
-
-// func mulAddWWW_s(x, y, c Word) (z1, z0 Word)
-// z1<<64 + z0 = x*y + c
-//
-TEXT big·mulAddWWW_s(SB),7,$0
- MOVQ a+0(FP), AX
- MULQ a+8(FP)
- ADDQ a+16(FP), AX
- ADCQ $0, DX
- MOVQ DX, a+24(FP)
- MOVQ AX, a+32(FP)
- RET
-
-
-// func divWWW_s(x1, x0, y Word) (q, r Word)
-// q = (x1<<64 + x0)/y + r
-//
-TEXT big·divWWW_s(SB),7,$0
- MOVQ a+0(FP), DX
- MOVQ a+8(FP), AX
- DIVQ a+16(FP)
- MOVQ AX, a+24(FP)
- MOVQ DX, a+32(FP)
- RET
-
-
-// ----------------------------------------------------------------------------
-// Elementary operations on vectors
-
// TODO(gri) - experiment with unrolled loops for faster execution
// func addVV_s(z, x, y *Word, n int) (c Word)
MOVQ DX, a+40(FP) // return r
RET
+
+
+// TODO(gri) Implement this routine completely in Go.
+// At the moment we need this assembly version.
+TEXT big·divWWW_s(SB),7,$0
+ MOVQ a+0(FP), DX
+ MOVQ a+8(FP), AX
+ DIVQ a+16(FP)
+ MOVQ AX, a+24(FP)
+ MOVQ DX, a+32(FP)
+ RET
for _, a := range sumWW {
arg := a;
testFunWW(t, "addWW_g", addWW_g, arg);
- testFunWW(t, "addWW_s", addWW_s, arg);
arg = argWW{a.y, a.x, a.c, a.z1, a.z0};
testFunWW(t, "addWW_g symmetric", addWW_g, arg);
- testFunWW(t, "addWW_s symmetric", addWW_s, arg);
arg = argWW{a.z0, a.x, a.c, a.z1, a.y};
testFunWW(t, "subWW_g", subWW_g, arg);
- testFunWW(t, "subWW_s", subWW_s, arg);
arg = argWW{a.z0, a.y, a.c, a.z1, a.x};
testFunWW(t, "subWW_g symmetric", subWW_g, arg);
- testFunWW(t, "subWW_s symmetric", subWW_s, arg);
}
}
for _, a := range sumVV {
arg := a;
testFunVV(t, "addVV_g", addVV_g, arg);
- testFunVV(t, "addVV_s", addVV_s, arg);
+ testFunVV(t, "addVV", addVV, arg);
arg = argVV{a.z, a.y, a.x, a.c};
testFunVV(t, "addVV_g symmetric", addVV_g, arg);
- testFunVV(t, "addVV_s symmetric", addVV_s, arg);
+ testFunVV(t, "addVV symmetric", addVV, arg);
arg = argVV{a.x, a.z, a.y, a.c};
testFunVV(t, "subVV_g", subVV_g, arg);
- testFunVV(t, "subVV_s", subVV_s, arg);
+ testFunVV(t, "subVV", subVV, arg);
arg = argVV{a.y, a.z, a.x, a.c};
testFunVV(t, "subVV_g symmetric", subVV_g, arg);
- testFunVV(t, "subVV_s symmetric", subVV_s, arg);
+ testFunVV(t, "subVV symmetric", subVV, arg);
}
}
for _, a := range sumVW {
arg := a;
testFunVW(t, "addVW_g", addVW_g, arg);
- testFunVW(t, "addVW_s", addVW_s, arg);
+ testFunVW(t, "addVW", addVW, arg);
arg = argVW{a.x, a.z, a.y, a.c};
testFunVW(t, "subVW_g", subVW_g, arg);
- testFunVW(t, "subVW_s", subVW_s, arg);
+ testFunVW(t, "subVW", subVW, arg);
}
}
for _, a := range prodVWW {
arg := a;
testFunVWW(t, "mulAddVWW_g", mulAddVWW_g, arg);
- testFunVWW(t, "mulAddVWW_s", mulAddVWW_s, arg);
+ testFunVWW(t, "mulAddVWW", mulAddVWW, arg);
if a.y != 0 && a.r < a.y {
arg := argWVW{a.x, a.c, a.z, a.y, a.r};
testFunWVW(t, "divWVW_g", divWVW_g, arg);
- testFunWVW(t, "divWVW_s", divWVW_s, arg);
+ testFunWVW(t, "divWVW", divWVW, arg);
}
}
}