math/big: Replace RCLQ + ANDQ with SETCS in unrolled arithmetic assembly.

author Christopher Swenson <cswenson@google.com>

Tue, 28 Aug 2012 16:29:45 +0000 (09:29 -0700)

committer Robert Griesemer <gri@golang.org>

Tue, 28 Aug 2012 16:29:45 +0000 (09:29 -0700)
author Christopher Swenson <cswenson@google.com>
Tue, 28 Aug 2012 16:29:45 +0000 (09:29 -0700)
committer Robert Griesemer <gri@golang.org>
Tue, 28 Aug 2012 16:29:45 +0000 (09:29 -0700)
diff --git a/src/pkg/math/big/arith_amd64.s b/src/pkg/math/big/arith_amd64.s

index 2d10793e63e4932879f23885557e83e43fab45fa..d4d2d2f1b1dad27d0cc0e74a076f6880fc706950 100644 (file)
--- a/src/pkg/math/big/arith_amd64.s
+++ b/src/pkg/math/big/arith_amd64.s
@@ -5,6 +5,16 @@
  // This file provides fast assembly versions for the elementary
  // arithmetic operations on vectors implemented in arith.go.
  
+// Literal instruction for MOVQ $0, CX.
+// (MOVQ $0, reg is translated to XORQ reg, reg and clears CF.)
+#define ZERO_CX BYTE $0x48; \
+               BYTE $0xc7; \
+               BYTE $0xc1; \
+               BYTE $0x00; \
+               BYTE $0x00; \
+               BYTE $0x00; \
+               BYTE $0x00
+
  // func mulWW(x, y Word) (z1, z0 Word)
  TEXT ·mulWW(SB),7,$0
         MOVQ x+0(FP), AX
@@ -137,7 +147,7 @@ TEXT ·addVW(SB),7,$0
         MOVQ x+16(FP), R8
         MOVQ y+32(FP), CX       // c = y
         MOVQ z+0(FP), R10
-       
+
         MOVQ $0, SI             // i = 0
  
         // s/JL/JMP/ below to disable the unrolled loop
@@ -151,15 +161,15 @@ U3:       // n >= 0
         MOVQ 16(R8)(SI*8), R13
         MOVQ 24(R8)(SI*8), R14
         ADDQ CX, R11
+       ZERO_CX
         ADCQ $0, R12
         ADCQ $0, R13
         ADCQ $0, R14
+       SETCS CX                // c = CF
         MOVQ R11, 0(R10)(SI*8)
         MOVQ R12, 8(R10)(SI*8)
         MOVQ R13, 16(R10)(SI*8)
         MOVQ R14, 24(R10)(SI*8)
-       RCLQ $1, CX             // c = CF
-       ANDQ $1, CX
  
         ADDQ $4, SI             // i += 4
         SUBQ $4, DI             // n -= 4
@@ -171,8 +181,8 @@ V3: ADDQ $4, DI             // n += 4
  L3:    // n > 0
         ADDQ 0(R8)(SI*8), CX
         MOVQ CX, 0(R10)(SI*8)
+       ZERO_CX
         RCLQ $1, CX             // c = CF
-       ANDQ $1, CX
  
         ADDQ $1, SI             // i++
         SUBQ $1, DI             // n--
@@ -203,15 +213,15 @@ U4:       // n >= 0
         MOVQ 16(R8)(SI*8), R13
         MOVQ 24(R8)(SI*8), R14
         SUBQ CX, R11
+       ZERO_CX
         SBBQ $0, R12
         SBBQ $0, R13
         SBBQ $0, R14
+       SETCS CX                // c = CF
         MOVQ R11, 0(R10)(SI*8)
         MOVQ R12, 8(R10)(SI*8)
         MOVQ R13, 16(R10)(SI*8)
         MOVQ R14, 24(R10)(SI*8)
-       RCLQ $1, CX             // c = CF
-       ANDQ $1, CX
  
         ADDQ $4, SI             // i += 4
         SUBQ $4, DI             // n -= 4
@@ -224,8 +234,8 @@ L4: // n > 0
         MOVQ 0(R8)(SI*8), R11
         SUBQ CX, R11
         MOVQ R11, 0(R10)(SI*8)
+       ZERO_CX
         RCLQ $1, CX             // c = CF
-       ANDQ $1, CX
  
         ADDQ $1, SI             // i++
         SUBQ $1, DI             // n--
author	Christopher Swenson <cswenson@google.com>
	Tue, 28 Aug 2012 16:29:45 +0000 (09:29 -0700)
committer	Robert Griesemer <gri@golang.org>
	Tue, 28 Aug 2012 16:29:45 +0000 (09:29 -0700)