#define LOG2E 1.4426950408889634073599246810018920 // 1/LN2
#define LN2U 0.69314718055966295651160180568695068359375 // upper half LN2
#define LN2L 0.28235290563031577122588448175013436025525412068e-12 // lower half LN2
-#define T0 1.0
-#define T1 0.5
-#define T2 1.6666666666666666667e-1
-#define T3 4.1666666666666666667e-2
-#define T4 8.3333333333333333333e-3
-#define T5 1.3888888888888888889e-3
-#define T6 1.9841269841269841270e-4
-#define T7 2.4801587301587301587e-5
#define PosInf 0x7FF0000000000000
#define NegInf 0xFFF0000000000000
#define Overflow 7.09782712893384e+02
+DATA exprodata<>+0(SB)/8, $0.5
+DATA exprodata<>+8(SB)/8, $1.0
+DATA exprodata<>+16(SB)/8, $2.0
+DATA exprodata<>+24(SB)/8, $1.6666666666666666667e-1
+DATA exprodata<>+32(SB)/8, $4.1666666666666666667e-2
+DATA exprodata<>+40(SB)/8, $8.3333333333333333333e-3
+DATA exprodata<>+48(SB)/8, $1.3888888888888888889e-3
+DATA exprodata<>+56(SB)/8, $1.9841269841269841270e-4
+DATA exprodata<>+64(SB)/8, $2.4801587301587301587e-5
+GLOBL exprodata<>+0(SB), RODATA, $72
+
// func Exp(x float64) float64
TEXT ·Exp(SB),NOSPLIT,$0
// test bits for not-finite
MULSD X0, X1
CVTSD2SL X1, BX // BX = exponent
CVTSL2SD BX, X1
+ CMPB ·useFMA(SB), $1
+ JE avxfma
MOVSD $LN2U, X2
MULSD X1, X2
SUBSD X2, X0
// reduce argument
MULSD $0.0625, X0
// Taylor series evaluation
- MOVSD $T7, X1
+ MOVSD exprodata<>+64(SB), X1
MULSD X0, X1
- ADDSD $T6, X1
+ ADDSD exprodata<>+56(SB), X1
MULSD X0, X1
- ADDSD $T5, X1
+ ADDSD exprodata<>+48(SB), X1
MULSD X0, X1
- ADDSD $T4, X1
+ ADDSD exprodata<>+40(SB), X1
MULSD X0, X1
- ADDSD $T3, X1
+ ADDSD exprodata<>+32(SB), X1
MULSD X0, X1
- ADDSD $T2, X1
+ ADDSD exprodata<>+24(SB), X1
MULSD X0, X1
- ADDSD $T1, X1
+ ADDSD exprodata<>+0(SB), X1
MULSD X0, X1
- ADDSD $T0, X1
+ ADDSD exprodata<>+8(SB), X1
MULSD X1, X0
- MOVSD $2.0, X1
+ MOVSD exprodata<>+16(SB), X1
ADDSD X0, X1
MULSD X1, X0
- MOVSD $2.0, X1
+ MOVSD exprodata<>+16(SB), X1
ADDSD X0, X1
MULSD X1, X0
- MOVSD $2.0, X1
+ MOVSD exprodata<>+16(SB), X1
ADDSD X0, X1
MULSD X1, X0
- MOVSD $2.0, X1
+ MOVSD exprodata<>+16(SB), X1
ADDSD X0, X1
MULSD X1, X0
- ADDSD $1.0, X0
+ ADDSD exprodata<>+8(SB), X0
// return fr * 2**exponent
+lastStep:
MOVL $0x3FF, AX // bias
ADDL AX, BX
JLE underflow
notNegInf: // NaN or +Inf, return x
MOVQ BX, ret+8(FP)
RET
+
+avxfma:
+ MOVSD $LN2U, X2
+ VFNMADD231SD X2, X1, X0
+ MOVSD $LN2L, X2
+ VFNMADD231SD X2, X1, X0
+ // reduce argument
+ MULSD $0.0625, X0
+ // Taylor series evaluation
+ MOVSD exprodata<>+64(SB), X1
+ VFMADD213SD exprodata<>+56(SB), X0, X1
+ VFMADD213SD exprodata<>+48(SB), X0, X1
+ VFMADD213SD exprodata<>+40(SB), X0, X1
+ VFMADD213SD exprodata<>+32(SB), X0, X1
+ VFMADD213SD exprodata<>+24(SB), X0, X1
+ VFMADD213SD exprodata<>+0(SB), X0, X1
+ VFMADD213SD exprodata<>+8(SB), X0, X1
+ MULSD X1, X0
+ VADDSD exprodata<>+16(SB), X0, X1
+ MULSD X1, X0
+ VADDSD exprodata<>+16(SB), X0, X1
+ MULSD X1, X0
+ VADDSD exprodata<>+16(SB), X0, X1
+ MULSD X1, X0
+ VADDSD exprodata<>+16(SB), X0, X1
+ VFMADD213SD exprodata<>+8(SB), X1, X0
+ JMP lastStep